diff --git a/hpvm/projects/onnx/.gitignore b/hpvm/projects/onnx/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..2f4db75f1d4127e9537f7efdf6ed6556b5c8d93b --- /dev/null +++ b/hpvm/projects/onnx/.gitignore @@ -0,0 +1,4 @@ +build/ +dist/ +onnx2hpvm.egg-info/ +.ipynb_checkpoints/ diff --git a/hpvm/projects/onnx/README.md b/hpvm/projects/onnx/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eba07cd9d4102283362bbd021f28ff59b3755b0e --- /dev/null +++ b/hpvm/projects/onnx/README.md @@ -0,0 +1,13 @@ +## Importing Conda Environment: + +conda env create -f onnx\_environment.yml + +## Activate/deactivate Conda Environment + +conda activate onnx\_frontend + +## Building and Installing Frontend for ONNX: + +python setup.py build + +python setup.py install diff --git a/hpvm/projects/onnx/TODO.md b/hpvm/projects/onnx/TODO.md new file mode 100644 index 0000000000000000000000000000000000000000..1ebb883f79b65b177baa5459b426bb3610f64ac0 --- /dev/null +++ b/hpvm/projects/onnx/TODO.md @@ -0,0 +1,43 @@ +# What kind of models we should use +ResNet-50 -> as a start? +BERT +Mask R-CNN + +# Questions from Sudipta +Graph optimization: operator fusions, data layout transformations +Vectorization/tensorization +Automatic scheduling +Handling dynamic control flow in model graph +Automatic differentiation + +# Operators used in the BERT model available in the ONNX model zoo, organized by type and number of occurences + +https://github.com/onnx/models/tree/master/text/machine_comprehension/bert-squad +(Opset 10 versions) + +DATA MANIPULATION +Unsqueeze 191 +Reshape 71 +Cast 70 +Transpose 62 +Concat 56 +Identity 28 +Squeeze 7 +Slice +Shape 5 +ConstantOfShape 1 +Gather 1 +OneHot 1 +Split 1 + +COMPUTATION +Mul 186 +Add 185 +MatMul 98 +Sub 62 +ReduceMean 50 +Sqrt 25 +Reciprocal 25 +Softmax 12 +Pow 12 +Tanh 12 diff --git a/hpvm/projects/onnx/env.yaml b/hpvm/projects/onnx/env.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f05a9304a4a1145abb7311a83b16d84153288fbb --- /dev/null +++ b/hpvm/projects/onnx/env.yaml @@ -0,0 +1,8 @@ +name: onnxfront +channels: + - defaults +dependencies: + - pip + - python=3.8.5=h7579374_1 + - pip: + - onnx==1.8.0 diff --git a/hpvm/projects/onnx/frontend/README.md b/hpvm/projects/onnx/frontend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..69ffe81f5ad6b1a57dda1a6410d4348ff616148a --- /dev/null +++ b/hpvm/projects/onnx/frontend/README.md @@ -0,0 +1,10 @@ +### How to Run +``` +python main.py +``` +Set all your config, e.g. onnx model location, input size and emit directory for generated source code, in **config.py**. + +### Resources +1. [ONNX overview](https://github.com/onnx/onnx/blob/master/docs/IR.md) +2. [ONNX operator specs](https://github.com/onnx/onnx/blob/master/docs/Operators.md) +3. [Conversion between models - available adapters](https://github.com/onnx/onnx/blob/master/onnx/version_converter.py#L21) \ No newline at end of file diff --git a/hpvm/projects/onnx/frontend/__init__.py b/hpvm/projects/onnx/frontend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hpvm/projects/onnx/frontend/codegen_hpvm.py b/hpvm/projects/onnx/frontend/codegen_hpvm.py new file mode 100644 index 0000000000000000000000000000000000000000..cd9eac233071787baa6a1224ebb00b2eae837a1b --- /dev/null +++ b/hpvm/projects/onnx/frontend/codegen_hpvm.py @@ -0,0 +1,146 @@ +from os import PathLike +from pathlib import Path +from typing import Dict, List, Tuple, Union + +import jinja2 + +from graph_builder import DFG +from graph_ir import DFGNode, TensorNode, WeightTensor + +TEMPLATE_FILE = "template_hpvm.cpp" +loader = jinja2.FileSystemLoader(searchpath=Path(__file__).parent) +template_env = jinja2.Environment(loader=loader, trim_blocks=True) +template = template_env.get_template(TEMPLATE_FILE) + + +class CodeGen: + def __init__( + self, + dfg: DFG, + output_dir: PathLike, + input_size: int, + batch_size: int = None, + prefix: str = None, + ): + self.dfg = dfg + self.var_count = 0 + self.output_dir = Path(output_dir) + self.prefix = prefix + # Some reasoning of input information + assert len(self.dfg.inputs) == 1 + input_tensor = self.dfg.inputs[0] + self.input_name = input_tensor.name + self.input_shape = input_tensor.shape[1:] + self.input_size = input_size + self.batch_size = batch_size or input_size + # self.variables is a "node to our name" map + # Each value is (varname, bool) and the bool indicates + # "is root node input" or not. + IdenT = Union[str, int] + self.root_args = sorted( + [n for n in dfg.traverse_order if isinstance(n, TensorNode)], + key=lambda n: n.name, + ) + self.weights = [n for n in self.root_args if isinstance(n, WeightTensor)] + self.variables: Dict[DFGNode, Tuple[IdenT, bool]] = { + f_name: (index, True) for index, f_name in enumerate(self.root_args) + } + + ################################################ + # Aux functions + ################################################ + + def _allocate_varname(self) -> str: + varname = f"var_{self.var_count}" + self.var_count += 1 + return varname + + @classmethod + def emit_weights(cls, weights: List[WeightTensor]) -> List[dict]: + ret = [] + for weight in weights: + name = cls.make_c_identifier(weight.name) + file_path = f"{weight.new_name}_path.bin" + ret.append({"name": name, "shape": weight.shape, "filename": file_path}) + return ret + + @staticmethod + def make_c_identifier(name: str) -> str: + name = name.replace(".", "_") + if name[0].isnumeric(): + name = "_" + name + return name + + +class HpvmCodeGen(CodeGen): + def _emit_hpvm_node_edges(self, input_vars: List[DFGNode]) -> List[dict]: + ret = [] + it = 0 + for node in input_vars: + hpvm_var_name, is_root_input = self.variables[node] + if is_root_input: + assert isinstance(hpvm_var_name, int) + ret.append( + {"is_bindin": True, "input_idx": hpvm_var_name, "edge_idx": it} + ) + else: + ret.append( + {"is_bindin": False, "input_node": hpvm_var_name, "edge_idx": it} + ) + it += 1 + return ret + + def emit_hpvm_node_structures(self) -> List[dict]: + node_envs = [] + for node in self.dfg.traverse_order: + if isinstance(node, TensorNode): + continue + inputs = self.dfg.node_args(node) + func_name, extra_args = node.hpvm_codegen() + if func_name == "": # No code generation + # Node must have single input, we equate the output to + # the input and skip code generation. + assert len(inputs) == 1 + self.variables[node] = self.variables[inputs[0]] + continue + varname = self._allocate_varname() + self.variables[node] = varname, False # not root-node arg + node_envs.append( + { + "name": varname, + "input_size": len(inputs), + "edges": self._emit_hpvm_node_edges(inputs), + "call_name": func_name, + "call_args": extra_args, + } + ) + return node_envs + + def emit_root_io(self) -> Tuple[List[str], str]: + input_args = [ + self.make_c_identifier(node.name) + for node, (_, is_root) in self.variables.items() + if is_root + ] + output_arg = self.variables[self.dfg.output][0] + return input_args, output_arg + + def compile(self) -> None: + nodes = self.emit_hpvm_node_structures() + inputs, output = self.emit_root_io() + weights = self.emit_weights(self.weights) + prefix = self.prefix or self.output_dir + with open(self.output_dir / "hpvm_src.cc", "w") as f: + f.write( + template.render( + nodes=nodes, + input_name=self.input_name, + input_size=self.input_size, + batch_size=self.batch_size, + input_shape=self.input_shape, + root_inputs=inputs, + root_output=output, + weights=weights, + prefix=prefix, + ) + ) diff --git a/hpvm/projects/onnx/frontend/codegen_tensor.py b/hpvm/projects/onnx/frontend/codegen_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..f0a73106054c41f1bbfcf6692a6e8398fd4de3b4 --- /dev/null +++ b/hpvm/projects/onnx/frontend/codegen_tensor.py @@ -0,0 +1,65 @@ +from pathlib import Path +from typing import Dict, List + +import jinja2 + +from codegen_hpvm import CodeGen +from graph_ir import DFGNode, TensorNode + +TEMPLATE_FILE = "template_tensor.cpp" +loader = jinja2.FileSystemLoader(searchpath=Path(__file__).parent) +template_env = jinja2.Environment(loader=loader, trim_blocks=True) +template = template_env.get_template(TEMPLATE_FILE) + + +class TensorCodeGen(CodeGen): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.variables: Dict[DFGNode, str] = { + n: self.make_c_identifier(n.name) for n in self.root_args + } + + ################################################ + # CodeGen functions + ################################################ + + def emit_graph(self) -> List[dict]: + graph_code = [] + for node in self.dfg.traverse_order: + if isinstance(node, TensorNode): + continue + inputs = self.dfg.node_args(node) + func_name, extra_args = node.codegen() + if func_name == "": # No code generation + # Node must have single input, we equate the output to + # the input and skip code generation. + assert len(inputs) == 1 + self.variables[node] = self.variables[inputs[0]] + continue + varname = self._allocate_varname() + self.variables[node] = varname + input_args = [self.variables[n] for n in inputs] + extra_args + graph_code.append( + {"output": varname, "inputs": input_args, "function": func_name} + ) + return graph_code + + ################################################ + # Compile is a top level function to compile an onnx model into C/C++ + # program with HPVM Tensor Runtime + ################################################ + + def compile(self): + graph_code = self.emit_graph() + output_arg = self.variables[self.dfg.output] + with open(self.output_dir / "src.cc", "w") as f: + f.write( + template.render( + input=self.input_name, + input_shape=self.input_shape, + output=output_arg, + graph_code=graph_code, + weights=self.emit_weights(self.weights), + output_dir=self.output_dir, + ) + ) diff --git a/hpvm/projects/onnx/frontend/graph_builder.py b/hpvm/projects/onnx/frontend/graph_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..3996b947d2e51b0eea515bdacf2ed1bb625e35ad --- /dev/null +++ b/hpvm/projects/onnx/frontend/graph_builder.py @@ -0,0 +1,349 @@ +from collections import defaultdict +from os import PathLike +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Tuple, Union + +import networkx as nx +import onnx + +import graph_ir as g +from onnx_attr import node_attr_to_dict + +GraphT = onnx.GraphProto +NodeT = onnx.NodeProto +NodeT.__hash__ = lambda self: id(self) +NodeT.__repr__ = NodeT.__str__ = lambda self: self.name + + +class MarkedSubGraph: + """A subgraph with information on how it should replace a node in a super graph. + + subgraph: a nx.DiGraph subgraph + entry_edges: a list of edges from nodes "outside" to nodes in self.subgraph + exit: the exit node of the subgraph. + When this subgraph replaces a node `n`, self.exit will be connected to + whateven `n` is connected to. + """ + + def __init__(self, subgraph: nx.DiGraph, entry_edges, exit) -> None: + assert all(to in subgraph for _, to, _ in entry_edges) + assert exit in subgraph + self.subgraph, self.exit = subgraph, exit + self.entry_edges = [(f, t, {"index": i}) for f, t, i in entry_edges] + + @classmethod + def idiomatic_1to2(cls, node1, node2, predecessors): + """Create an idiomatic replacement as follow: + + node(arg1, arg2, arg3) -> node2(node1(arg1, arg2), arg3)""" + p0, p1, p2 = predecessors + graph = nx.DiGraph() + graph.add_edge(node1, node2, index=0) + return cls(graph, [(p0, node1, 0), (p1, node1, 1), (p2, node2, 1)], node2) + + +EmitNodeT = Union[MarkedSubGraph, g.DFGNode] + + +class DFG(object): + """ONNX model translated into DFG with `DFGNode`s. + + This class has a DFG, input/output information, and a clear traverse order + (think dominant tree), and is easier for CodeGen classes to work with.""" + + def __init__(self, graph: GraphT): + self._check_model(graph) + self._var_count = 0 + # Build explicit DFG with ONNX nodes + onnx_graph = self._build_onnx_dfg(graph) + # Convert ONNX dfg into DFGNode DFG + self.graph = self._build_dfg(onnx_graph) + # Find out input nodes and output node (unique) + # removing dead nodes along the way if any + self.inputs, self.output = self._dce_get_io_info() + + ################ Interfaces: + + @property + def traverse_order(self) -> List[g.DFGNode]: + """Get topological order of computational graph by use-def relation.""" + return list(nx.topological_sort(self.graph)) + + def node_args(self, node: g.DFGNode): + """Get input arguments of node.""" + sorted_edges = sorted(self.graph.in_edges(node, "index"), key=lambda p: p[2]) + return [e[0] for e in sorted_edges] + + def dump_weights(self, output_dir: PathLike) -> None: + """Dump `WeightTensor`s into output_dir.""" + output_dir = Path(output_dir) + for node in self.graph.nodes: + if not isinstance(node, g.WeightTensor): + continue + node.dump_weight(output_dir / (node.new_name + "_path.bin")) + + ################ Internal methods (high-level): + + @staticmethod + def _check_model(onnx_graph: GraphT): + """Check model validaty and single output (which is our limitation)""" + + import warnings + from onnx import checker, onnx_cpp2py_export + + # try use onnx's own model checker before converting any model + try: + checker.check_graph(onnx_graph) + except onnx_cpp2py_export.checker.ValidationError as e: + warnings.warn(str(e)) + if any(len(n.output) > 1 for n in onnx_graph.node): + raise ValueError("All node must have single output") + if len(onnx_graph.output) > 1: + raise ValueError("Graph must have single output") + + @staticmethod + def _build_onnx_dfg(graph: GraphT) -> nx.DiGraph: + """Creates a DiGraph (by use-def relation) of onnx nodes from onnx GraphProto. + DiGraph is easier to use as a graph compared to GraphProto where use-def is implicit.""" + + ret_graph = nx.DiGraph() + onnx_defs, onnx_uses = def_use(graph.node) + tensors = extract_tensors_from_graph(graph) + ret_graph.add_nodes_from(graph.node) + for onnx_value_name, use_nodes in onnx_uses.items(): + def_node = onnx_defs.get(onnx_value_name) + if def_node is None: + def_node = tensors[onnx_value_name] + for use_node, used_at_narg in use_nodes: + ret_graph.add_edge(def_node, use_node, index=used_at_narg) + return ret_graph + + def _build_dfg(self, onnx_graph: nx.DiGraph) -> nx.DiGraph: + """Translate _build_onnx_dfg output into DFGNode DFG. + + First run some passes to process subgraphs that needs to be + processed together, then each unprocessed node is generated into + 1 or more nodes.""" + + # Remove subgraphs that can be a single Flatten instead + onnx_graph = detect_flatten(onnx_graph) + # Remove subgraphs that look like padding but does nothing + onnx_graph = remove_no_padding(onnx_graph) + # For each onnx node, generate our nodes + node_to_nodes, error_nodes = {}, [] + for onnx_node in nx.topological_sort(onnx_graph): + our_nodes = self._emit_node(onnx_graph, onnx_node) + if our_nodes is None: + error_nodes.append(onnx_node) + else: + node_to_nodes[onnx_node] = our_nodes + if error_nodes: + error_repr = [f"{n.name}({n.op_type})" for n in error_nodes] + if len(error_nodes) > 10: # Magic number + raise ValueError(f"Unsupported operators (first 10): {error_repr[:10]}") + else: + raise ValueError(f"Unsupported operators: {error_repr}") + # Apply node_to_nodes replacement on onnx_graph to create a new DFG + return build_graph_with_mapping(onnx_graph, node_to_nodes) + + def _dce_get_io_info(self): + inputs = [n for n in self.graph if isinstance(n, g.InputTensor)] + inputs_set = set(inputs) + reachables = set() + for component in nx.connected_components(self.graph.to_undirected()): + # If any inputs goes into this subgraph, it's alive. + if set(component).intersection(inputs_set): + reachables.update(component) + unreachables = set(self.graph) - reachables + # Remove nodes unreachable from input + self.graph.remove_nodes_from(unreachables) + # Then outputs are nodes with out_degree = 0 + outputs = [n for n in self.graph if self.graph.out_degree[n] == 0] + assert len(outputs) == 1 + return inputs, outputs[0] + + @staticmethod + def _emit_node(in_graph: nx.DiGraph, node: NodeT) -> Optional[EmitNodeT]: + predec = sorted_inputs(in_graph, node) + if isinstance(node, g.DFGNode): + # Directly add node into return graph. + return node + + attrs = node_attr_to_dict(node) + if node.op_type == "Conv": + weight_tensor = predec[1] + assert isinstance(weight_tensor, g.WeightTensor) + if len(weight_tensor.shape) != 4: + return None # Only supports 2D conv + conv_node = g.Conv2DNode(node.name, attrs) + if len(predec) == 2: + return conv_node + # Split into conv followed by an addition + bias_node = g.BiasAddNode(f"Bias_{node.name.split('_')[-1]}") + return MarkedSubGraph.idiomatic_1to2(conv_node, bias_node, predec) + elif node.op_type in ("MatMul", "Gemm"): + mul_node = g.MatMulNode(node.name, attrs) + if node.op_type == "Gemm": + mul_node.gemm_transpose(node, predec) + if len(predec) == 2: + return mul_node + # Split into mul followed by an addition + bias_node = g.BiasAddNode(f"Bias_{node.name.split('_')[-1]}") + return MarkedSubGraph.idiomatic_1to2(mul_node, bias_node, predec) + one_to_one_nodes = { + "MaxPool": g.MaxPool2DNode, + "AveragePool": g.AveragePool2DNode, + "Add": g.AddNode, + "Softmax": g.SoftMaxNode, + "Relu": g.ReluNode, + "Tanh": g.TanhNode, + "BatchNormalization": g.BatchNormalizationNode, + "Pad": g.PadNode, + "Identity": g.IdentityNode, + "Flatten": g.FlattenNode, + } + if node.op_type in one_to_one_nodes: + return one_to_one_nodes[node.op_type](node.name, attrs) + return None + + +def def_use(nodes: Iterable) -> Tuple[dict, dict]: + """Computes def/use relation from a list of node. + + This method is duck-typed and operates on any node defining .input and .output. + """ + defs, uses = {}, defaultdict(list) + for n in nodes: + for i, input_ in enumerate(n.input): + uses[input_].append((n, i)) + for output in n.output: + defs[output] = n + return defs, uses + + +def remove_no_padding(graph: nx.DiGraph) -> nx.DiGraph: + """Remove subgraphs that look like padding but does nothing.""" + for node in list(graph.nodes): + if node.op_type != "Pad": + continue + input_args = sorted_inputs(graph, node) + # Find the second input argument to Pad (will be a Constant node) + # and take that away as well. + nct = input_args[1] + padding = node_attr_to_dict(nct)["value"] + if any(p != 0 for p in padding): + continue + # Connect input of Pad to where output of Pad goes + succ = graph.out_edges(node, "index") + for _, to, index in succ: + graph.add_edge(input_args[0], to, index=index) + # Remove nodes + graph.remove_nodes_from([node, nct]) + return graph + + +def detect_flatten(graph: nx.DiGraph) -> nx.DiGraph: + """Look for a shape-gather-unsqueeze-concat-reshape chain and replace that with flatten.""" + + for node in list(graph.nodes): + if node.op_type != "Shape": + continue + ng = get_next_in_chain(graph, "Gather", node) + # Find the second input argument to Gather (will be a Constant node) + # and take that away as well. + nct = sorted_inputs(graph, ng)[1] + nu = get_next_in_chain(graph, "Unsqueeze", ng) + nc = get_next_in_chain(graph, "Concat", nu) + nr = get_next_in_chain(graph, "Reshape", nc) + if nr is None: + continue + _, suffix = node.name.split("_") + gen_node = g.FlattenNode(f"Flatten_{suffix}") + replace_chain_with_node_(graph, [node, ng, nct, nu, nc, nr], gen_node) + return graph + + +def get_next_in_chain( + graph: nx.DiGraph, type_: str, node: Optional[NodeT] +) -> Optional[NodeT]: + """ + Get a unique user node of the unique output of Node `node`, + and return it if it has Type `type_`. + """ + if node is None or len(node.output) != 1: + return None # Propagates None; Unique output + users = list(graph.neighbors(node)) + if len(users) != 1 or users[0].op_type != type_: + return None # Unique user of the output; Correct type + return users[0] + + +def replace_chain_with_node_(graph: nx.DiGraph, chain: list, node) -> nx.DiGraph: + inputs = sorted_inputs(graph, chain[0]) + succ = graph.out_edges(chain[-1], "index") + for i, n in enumerate(inputs): + graph.add_edge(n, node, index=i) + for _, to, index in succ: + graph.add_edge(node, to, index=index) + graph.remove_nodes_from(chain) + return graph + + +def build_graph_with_mapping( + graph: nx.DiGraph, node_mapping: Dict[NodeT, EmitNodeT] +) -> nx.DiGraph: + graph = graph.copy() + single_node, multi_node = {}, {} + for replace_node, by_node in node_mapping.items(): + if isinstance(by_node, g.DFGNode): + single_node[replace_node] = by_node + else: + multi_node[replace_node] = by_node + # We do one-to-many replacements first + # because their predecessors are specified as onnx nodes. + for replace_node, subgraph in multi_node.items(): + # Add subgraph itself + graph = nx.compose(graph, subgraph.subgraph) + # Add in edges + graph.add_edges_from(subgraph.entry_edges) + # Add out edges + succ = graph.out_edges(replace_node, "index") + for _, to, index in succ: + graph.add_edge(subgraph.exit, to, index=index) + # Remove old node + graph.remove_node(replace_node) + # Then do all one-to-one replacements. + graph = nx.relabel_nodes(graph, single_node) + return graph + + +def extract_tensors_from_graph(onnx_graph: GraphT) -> Dict[str, g.TensorNode]: + tensors = {} + # parse weight + weight_cnt = 0 + for weight_tensor in onnx_graph.initializer: + tensors[weight_tensor.name] = g.WeightTensor( + weight_tensor, f"weight_{weight_cnt}" + ) + weight_cnt += 1 + # parse input + input_cnt = 0 + for input_ in onnx_graph.input: + if input_.name in tensors: + continue + tensors[input_.name] = g.InputTensor(input_, f"input_{input_cnt}") + input_cnt += 1 + return tensors + + +def sorted_inputs(graph: nx.DiGraph, node): + sorted_edges = sorted(graph.in_edges(node, "index"), key=lambda p: p[2]) + return [e[0] for e in sorted_edges] + + +def draw_graph(graph: nx.DiGraph, output_to): + from networkx.drawing.nx_agraph import to_agraph + + agraph = to_agraph(graph) + agraph.layout("dot") + agraph.draw(output_to) diff --git a/hpvm/projects/onnx/frontend/graph_ir.py b/hpvm/projects/onnx/frontend/graph_ir.py new file mode 100644 index 0000000000000000000000000000000000000000..fd42617f600e8ec8e6c9012fbd3adfa56ada76ec --- /dev/null +++ b/hpvm/projects/onnx/frontend/graph_ir.py @@ -0,0 +1,306 @@ +import abc +from os import PathLike +from typing import List, Tuple + +import onnx + +from onnx_attr import node_attr_to_dict + + +class DFGNode(abc.ABC): + """Abstract node that represents 1 instruction in HPVM. + + op_type should be overriden in subclasses for readability. + """ + + op_type = "" + + def __init__(self, name: str, attrs: dict = {}): + self.name, self.attrs = name, attrs + + def codegen(self) -> Tuple[str, list]: + return "", [] + + def hpvm_codegen(self) -> Tuple[str, list]: + return "", [] + + def __repr__(self) -> str: + return f"{self.op_type}({self.name})" + + +class TensorNode(DFGNode, abc.ABC): + """An abstract node for a value that exists without an instruction. + + This is akin to Value class in LLVM, but in a different place on the + inheritance tree.""" + + def __init__(self, proto: onnx.TensorProto, new_name: str): + if not proto.name.strip(): + raise ValueError("Tensor's name is required.") + super().__init__(proto.name, {}) + self.new_name = new_name + + def __str__(self): + return f"{self.__class__.__name__}: {self.name}" + + __repr__ = __str__ + + +class InputTensor(TensorNode): + """Input to the computation graph. + + This is basically only used for its information about the ONNX input, + itself doesn't emit instruction or any interesting thing. + """ + + op_type = "InputTensor" + + def __init__(self, input_proto: onnx.TensorProto, new_name: str): + super().__init__(input_proto, new_name) + # get type of input tensor + tensor_type = input_proto.type.tensor_type + # check if it has a shape: + shape = tensor_type.shape + self.shape: List[int] = [d.dim_value for d in shape.dim] + + +class WeightTensor(TensorNode): + """An initialized parameter in ONNX graph. + + This is any parameter that has a initializer value in the ONNX model + (as opposed to InputTensor, which doesn't have any value). + """ + + op_type = "WeightTensor" + + def __init__(self, weight_proto: onnx.TensorProto, new_name: str): + from onnx import numpy_helper + + super().__init__(weight_proto, new_name) + self.shape = [] + self.input_data = numpy_helper.to_array(weight_proto) + sh = self.input_data.shape + if len(sh) == 1: + self.shape = [1, sh[0], 1, 1] + elif len(sh) == 2: + self.shape = [1, 1, sh[0], sh[1]] + elif len(sh) == 4: + self.shape = [sh[0], sh[1], sh[2], sh[3]] + else: + self.shape = [1] * 4 + + def dump_weight(self, file_name: PathLike): + self.input_data.tofile(file_name) + + def transpose_(self): + if len(self.input_data.shape) != 2: + raise ValueError("Can only transpose 2D array") + self.input_data = self.input_data.T + self.shape[3], self.shape[2] = self.shape[2:] + + +class Conv2DNode(DFGNode): + op_type = "Conv2D" + + def __init__(self, name: str, attrs: dict): + super().__init__(name, attrs) + padding = self.attrs["pads"] + assert len(padding) == 4, "2D convolution must have 4 padding values" + if any(p != padding[0] for p in padding[1:]): + raise ValueError("Convolution with different padding is unsupported") + self.padding = padding[0] + self.strides = self.attrs["strides"] + + def codegen(self): + return ( + "tensorConvolution", + [self.padding, self.padding, self.strides[0], self.strides[1]], + ) + + def hpvm_codegen(self): + return ( + "__visc__tensor_convolution", + [self.padding, self.padding, self.strides[0], self.strides[1]], + ) + + +class _Pool2DNode(DFGNode, abc.ABC): + """Common super class of Average pooling and Max pooling.""" + + pool_type = "0" + + def __init__(self, name: str, attrs: dict): + super().__init__(name, attrs) + self.strides = self.attrs["strides"] + self.pool_size = self.attrs["kernel_shape"] + self.padding = 0 + + def codegen(self): + return ( + "tensorPooling", + [ + self.pool_type, + *self.pool_size, + self.padding, + self.padding, + *self.strides, + ], + ) + + def hpvm_codegen(self): + return ( + "__visc__tensor_pool_max", + [*self.pool_size, self.padding, self.padding, *self.strides], + ) + + +class MaxPool2DNode(_Pool2DNode): + pool_type = "0" + op_type = "MaxPool2D" + + +class AveragePool2DNode(_Pool2DNode): + pool_type = "1" + op_type = "AveragePool2D" + + +class BiasAddNode(DFGNode): + op_type = "BiasAdd" + + def codegen(self): + return "tensorAdd", [] + + def hpvm_codegen(self): + return "__visc__tensor_add", [] + + +class MatMulNode(DFGNode): + op_type = "MatMul" + + def codegen(self): + return "tensorGemmGPU", [] + + def hpvm_codegen(self): + return "__visc__tensor_mul", [] + + @staticmethod + def gemm_transpose(onnx_gemm_node, predec): + """Find and transpose weights of the onnx gemm node. + + This way we transpose the constant weight instead of exporting + a transpose node (which doesn't yet exist in HPVM). + """ + + def _transpose(weight): + if not isinstance(weight, WeightTensor): + raise ValueError( + f"Cannot transpose non-const {weight} (transpose op needed)" + ) + weight.transpose_() + + # Some tensors may need transposing + attrs = node_attr_to_dict(onnx_gemm_node) + if attrs.get("transA", False): + _transpose(predec[0]) + if attrs.get("transB", False): + _transpose(predec[1]) + + +class SoftMaxNode(DFGNode): + op_type = "SoftMax" + + def codegen(self): + return "tensorSoftmax", [] + + def hpvm_codegen(self): + return "__visc__tensor_softmax", [] + + +class AddNode(DFGNode): + op_type = "Add" + + def codegen(self): + return "tensorAdd", [] + + def hpvm_codegen(self): + return "__visc__tensor_add", [] + + +class ReluNode(DFGNode): + op_type = "ReLU" + + def codegen(self): + return "tensorRelu", [] + + def hpvm_codegen(self): + return "__visc__tensor_relu", [] + + +class TanhNode(DFGNode): + op_type = "Tanh" + + def codegen(self): + return "tensorTanh", [] + + def hpvm_codegen(self): + return "__visc__tensor_tanh", [] + + +class BatchNormalizationNode(DFGNode): + op_type = "BN" + + def __init__(self, name: str, attrs: dict): + super().__init__(name, attrs) + self.epsilon = self.attrs["epsilon"] + + def codegen(self): + return "tensorBatchNorm", [self.epsilon] + + def hpvm_codegen(self): + return "__visc__tensor_batchnorm", [self.epsilon] + + +class FlattenNode(DFGNode): + op_type = "Flatten" + + +class ActivationNode(DFGNode): + """ + Element wise operators that is for activation function + e.g. HardSigmoid, LeakyRelu, PRelu, Pow, Reciprocal, + Relu, Selu, Sigmoid, Softplus, Sqrt, ThresholdedRelu, + Abs, Ceil, Elu, Floor, Neg + """ + + pass + + +class LogicalOpNode(DFGNode): + """ + Element wise operators that is not for activation function. + In other words, they are logical comparison operators + e.g. And, Equal, Greater, GreaterOrEqual, Less, LessOrEqual, + Or, Xor + """ + + pass + + +class ZeroPadding2DNode(DFGNode): + pass + + +class DepthwiseConv2DNode(DFGNode): + pass + + +class DenseNode(DFGNode): + pass + + +class PadNode(DFGNode): + pass + + +class IdentityNode(DFGNode): + pass diff --git a/hpvm/projects/onnx/frontend/main.py b/hpvm/projects/onnx/frontend/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d3037224ac6a44535eabc8cf9f8e96ecf48021c9 --- /dev/null +++ b/hpvm/projects/onnx/frontend/main.py @@ -0,0 +1,106 @@ +from pathlib import Path +from typing import Optional + +import onnx + + +def check_version(model, new_version): + try: + opset = model.opset_import[0].version if model.opset_import else 1 + except AttributeError: + opset = 1 # default opset version set to 1 if not specified + if opset != new_version: + from onnx import version_converter + + try: + converted_model = version_converter.convert_version(model, new_version) + return converted_model + except RuntimeError as e: + raise RuntimeError( + f"Current version {opset} of ONNX model not supported!\n" + f"Conversion failed with message below: \n{e}" + ) + return model + + +def compile( + onnx_file: Path, + output_dir: Path, + input_size: int, + prefix: Optional[str], + batch_size: Optional[int], + opset: Optional[int], + hpvmc: bool, +): + from graph_builder import DFG + from codegen_tensor import TensorCodeGen + from codegen_hpvm import HpvmCodeGen + + model = onnx.load(onnx_file) + if opset is not None: + model = check_version(model, opset) + dfg = DFG(model.graph) + if hpvmc: + hpvmCodeGen = HpvmCodeGen(dfg, output_dir, input_size, batch_size, prefix) + hpvmCodeGen.compile() + else: + TensorCodeGen = TensorCodeGen(dfg, output_dir, input_size) + TensorCodeGen.compile() + dfg.dump_weights(output_dir) + + +def parse_args(): + import argparse + + parser = argparse.ArgumentParser(description="ONNX to HPVM-C") + parser.add_argument("onnx_file", type=Path, help="Path to input ONNX file") + parser.add_argument( + "output_dir", + type=Path, + help="Output folder where source file and weight files are generated", + ) + parser.add_argument( + "input_size", type=int, help="Size of input dataset", + ) + parser.add_argument( + "-p", + "--prefix", + type=str, + help="Prefix in generated code; will be attached before name of weight/input files." + "Defaults to output_dir.", + ) + parser.add_argument( + "-b", + "--batch-size", + type=int, + help="Batch size to be used in the generated code. " + "Defaults to input size (i.e., not using batch).", + ) + parser.add_argument("--opset", type=int, help="ONNX opset version (enforced)") + parser.add_argument( + "-c", + "--compile-mode", + type=str, + choices=["tensor", "hpvmc"], + default="hpvmc", + help="""Output mode. +tensor: HPVM Tensor Runtime; +hpvmc: HPVM C Interface. Default value is hpvmc.""", + ) + + args = parser.parse_args() + args.hpvmc = args.compile_mode == "hpvmc" + delattr(args, "compile_mode") + return args + + +def main(): + import os + + args = parse_args() + os.makedirs(args.output_dir, exist_ok=True) + compile(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/hpvm/projects/onnx/frontend/onnx_attr.py b/hpvm/projects/onnx/frontend/onnx_attr.py new file mode 100644 index 0000000000000000000000000000000000000000..7434dccbbb9f579af7e3656ad73560332c88bc74 --- /dev/null +++ b/hpvm/projects/onnx/frontend/onnx_attr.py @@ -0,0 +1,83 @@ +from typing import Tuple + +import numpy as np +from onnx import AttributeProto, NodeProto, TensorProto + + +def throw_ctor(ty): + def _throw_ctor(x): + raise ValueError(f"Cannot construct type {ty} from value {x}") + + return _throw_ctor + + +def composite_ctor(singular_ctor): + def _composite_ctor(xs): + return [singular_ctor(x) for x in xs] + + return _composite_ctor + + +def tensor_ctor(x: TensorProto) -> np.ndarray: + import numpy as np + + tensor_typenames_to_numpy_ty = { + "BOOL": np.bool, + "UINT8": np.uint8, + "INT8": np.int8, + "UINT16": np.uint16, + "INT16": np.int16, + "INT32": np.int32, + "UINT32": np.uint32, + "INT64": np.int64, + "UINT64": np.uint64, + "STRING": np.str, + "FLOAT16": np.float16, + "FLOAT": np.float32, + "DOUBLE": np.float64, + # 'BFLOAT16' -- unsupported + "COMPLEX64": np.complex64, + "COMPLEX128": np.complex128, + } + + get_tensor_typename = TensorProto.DataType.Name + tensor_typename = get_tensor_typename(x.data_type) + if tensor_typename not in tensor_typenames_to_numpy_ty: + raise ValueError(f"Tensor with type {tensor_typename} cannot be processed") + numpy_dtype = tensor_typenames_to_numpy_ty[tensor_typename] + numpy_arr = np.frombuffer(x.raw_data, dtype=numpy_dtype).reshape(x.dims).copy() + return numpy_arr + + +def parse_node_attr(onnx_attr: AttributeProto) -> Tuple[str, object]: + from collections import namedtuple + + AttrMeta = namedtuple("AttrMeta", ["ctor", "data_name"]) + attr_typenames_to_meta = { + "FLOAT": AttrMeta(float, "f"), + "INT": AttrMeta(int, "i"), + "STRING": AttrMeta(str, "s"), + "TENSOR": AttrMeta(tensor_ctor, "t"), + "GRAPH": AttrMeta(throw_ctor("GRAPH"), "g"), + "SPARSE_TENSOR": AttrMeta(throw_ctor("SPARSE_TENSOR"), "sparse_tensor"), + "FLOATS": AttrMeta(composite_ctor(float), "floats"), + "INTS": AttrMeta(composite_ctor(int), "ints"), + "STRINGS": AttrMeta(composite_ctor(str), "strings"), + "TENSORS": AttrMeta(composite_ctor(tensor_ctor), "tensors"), + "GRAPHS": AttrMeta(throw_ctor("GRAPHS"), "graphs"), + "SPARSE_TENSORS": AttrMeta(throw_ctor("SPARSE_TENSORS"), "sparse_tensors"), + } + + get_attr_typename = AttributeProto.AttributeType.Name + typename = get_attr_typename(onnx_attr.type) + assert ( + typename in attr_typenames_to_meta + ), f"ONNX attribute contains non-ONNX type {typename}" + attr_meta = attr_typenames_to_meta[typename] + data = getattr(onnx_attr, attr_meta.data_name) + parsed_data = attr_meta.ctor(data) + return parsed_data + + +def node_attr_to_dict(onnx_node: NodeProto): + return {attr.name: parse_node_attr(attr) for attr in onnx_node.attribute} diff --git a/hpvm/projects/onnx/frontend/template_hpvm.cpp b/hpvm/projects/onnx/frontend/template_hpvm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e4d13c84ece354d561b141693a3b4ec40a68974a --- /dev/null +++ b/hpvm/projects/onnx/frontend/template_hpvm.cpp @@ -0,0 +1,98 @@ +#include <string> +#include <visc.h> +#include <tensorTypes.h> +#include <tensorUtils.h> + +{% for node in nodes %} +void {{node.name}}_node( +{%- for n in range(node.input_size) -%} +void *t{{n}}, size_t bytes_t{{n}}{{", " if not loop.last}} +{%- endfor %}) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes({{node.input_size}}, {% for n in range(node.input_size) -%} +t{{n}}{{", " if not loop.last}} +{%- endfor %}, 0); + void *r = {{node.call_name}}({% for n in range(node.input_size) -%} +t{{n}}{{", " if not loop.last}} +{%- endfor %}{{", " if node.call_args}}{{node.call_args|join(", ")}}); + __visc__return(2, r, (size_t) 0); +} + +{% endfor -%} + +void root(void *__output, {%- for n in root_inputs -%} +void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}} +{%- endfor %}) { + __visc__hint(visc::CPU_TARGET); + __visc__attributes({{root_inputs|length}}, {% for n in root_inputs -%} +{{n}}{{", " if not loop.last}} +{%- endfor %}, 0); + +{% for node in nodes %} + void* {{node.name}} = __visc__createNodeND(0, {{node.name}}_node); +{% for edge in node.edges %} +{% if edge.is_bindin %} + __visc__bindIn({{node.name}}, {{edge.input_idx * 2 + 1}}, {{edge.edge_idx * 2}}, 0); + __visc__bindIn({{node.name}}, {{edge.input_idx * 2 + 2}}, {{edge.edge_idx * 2 + 1}}, 0); +{% else %} + __visc__edge({{edge.input_node}}, {{node.name}}, 1, 0, {{edge.edge_idx * 2}}, 0); + __visc__edge({{edge.input_node}}, {{node.name}}, 1, 1, {{edge.edge_idx * 2 + 1}}, 0); +{% endif %} +{% endfor %} + +{% endfor %} + __visc__bindOut({{root_output}}, 0, 0, 0); + __visc__bindOut({{root_output}}, 1, 1, 0); +} + +struct ret_t { + void* tensor; + size_t bytes; +}; + +typedef struct __attribute__((__packed__)) { + void *__output; +{% for n in root_inputs %} + void *{{n}}; + size_t {{n}}_bytes; +{% endfor %} + struct ret_t r; +} RootIn; + + +const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size; + +int main(){ + std::string dir_prefix = "{{prefix}}/"; + std::string input_path = dir_prefix + "input.bin"; + std::string labels_path = dir_prefix + "labels32.bin"; +{% for w in weights %} + std::string {{w.name}}_path = dir_prefix + "{{w.filename}}"; + void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}}); +{% endfor %} + + RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); + void* {{input_name}} = create4DTensor(0, nchw, batch_size, {{input_shape|join(', ')}}); +{% for n in root_inputs %} + args->{{n}} = {{n}}; + args->{{n}}_bytes = 0; +{% endfor %} + + __visc__init(); + startMemTracking(); + for (int i = 0; i < batch_count; i++){ + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, {{input_shape|join(', ')}}, {{input_name}}); + + void* dfg = __visc__launch(0, root, (void*) args); + __visc__wait(dfg); + void *result = static_cast<RootIn*>(args)->__output; + hpvm_request_tensor(result, 0); + + uint32_t* labels = readLabelsBatch3(labels_path.c_str(), start, end); + computeAccuracy3(labels, result); + freeBatchMemory(); + } + __visc__cleanup(); + return 0; +} diff --git a/hpvm/projects/onnx/frontend/template_tensor.cpp b/hpvm/projects/onnx/frontend/template_tensor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ae3060836451069ed6aee9321d714db1b66c1723 --- /dev/null +++ b/hpvm/projects/onnx/frontend/template_tensor.cpp @@ -0,0 +1,50 @@ +#include <chrono> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + +int main() { + std::string dir_prefix = "{{output_dir}}"; + std::string input_path = dir_prefix + "input.bin"; + std::string labels_path = dir_prefix + "labels.bin"; +{% for w in weights %} + std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}"); + void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}}); +{% endfor %} + + llvm_hpvm_initTensorRt(0); + startMemTracking(); + + int test_input_size = {{input_shape[0]}}; + int batch_size = {{input_shape[0]}}; + // # FIXME: Ceiling for batch_count + int batch_count = test_input_size / batch_size; + float final_accuracy = 0.0; + + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size; + int end = (i + 1) * batch_size; + void *{{input}} = readInputBatch(input_path.c_str(), 0, start, end, {{input_shape|join(', ')}}); + +{% for code in graph_code %} + auto {{code.output}} = {{code.function}}({{code.inputs|join(', ')}}); +{% endfor %} + + uint32_t* labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, {{output}}); + final_accuracy += accuracy; + freeBatchMemory(); + } + + final_accuracy = final_accuracy / batch_count; + dumpFinalAccuracy(final_accuracy); + llvm_hpvm_cleanupTensorRt(); + return 0; +} \ No newline at end of file diff --git a/hpvm/projects/onnx/models/alexnet/alexnet.onnx b/hpvm/projects/onnx/models/alexnet/alexnet.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d87fa5fe75eddf8ae2823bd0ace120bbacd50cdf Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/alexnet.onnx differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_0.npz b/hpvm/projects/onnx/models/alexnet/test_data_0.npz new file mode 100644 index 0000000000000000000000000000000000000000..0089f591dc76a98182e0384c70874fca701d3469 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_0.npz differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_1.npz b/hpvm/projects/onnx/models/alexnet/test_data_1.npz new file mode 100644 index 0000000000000000000000000000000000000000..30b9d311981525cff5ef71ad588a763eb403c1b4 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_1.npz differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_2.npz b/hpvm/projects/onnx/models/alexnet/test_data_2.npz new file mode 100644 index 0000000000000000000000000000000000000000..f73145dc9154aacf13455de9a865889d54efe263 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_2.npz differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0dbdb3ecde433c70d5201cce0343b396261ca9e0 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c9a100142c98c684ce9358966c6ae4b99e62521d Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..968e6125f1ab45d6d3a1c2067309f05a2aab689a Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c841190b7499f8eded7a484e691254a5aafbc4db Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..189e616b49d2fa21fae9a32425e24cd263ca0203 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ec17185491fa73d344cb46415185422bc7f416bc Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d91313b14fb444c268f57e8bb05f8c38421107d Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..49e4e4f83c8384886ceb08283a6b46d2430ac1b9 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..72d611c195d20032b4953562f4722b4e1ddedff8 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..dfe5b65c8260d684171cc66e3759cb66e02a1ed7 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f737585597ce2b56253e37f75f3defbbfce30cec Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6374baf7869b08c98c3b43aebbe58e3d15e99774 Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb differ diff --git a/hpvm/projects/onnx/models/keras/alexnet.onnx b/hpvm/projects/onnx/models/keras/alexnet.onnx new file mode 100644 index 0000000000000000000000000000000000000000..452d9f846fad21b5fd89321e86b8a7945c625d2d Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet.onnx differ diff --git a/hpvm/projects/onnx/models/keras/alexnet2.onnx b/hpvm/projects/onnx/models/keras/alexnet2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e1147a8e589243004411b2b4f958a2240f3df94d Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet2.onnx differ diff --git a/hpvm/projects/onnx/models/keras/alexnet8k.onnx b/hpvm/projects/onnx/models/keras/alexnet8k.onnx new file mode 100644 index 0000000000000000000000000000000000000000..49f74d5f2f4dfb74f266b174a5a72d5c7add0cc1 Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet8k.onnx differ diff --git a/hpvm/projects/onnx/models/keras/alexnet_last.onnx b/hpvm/projects/onnx/models/keras/alexnet_last.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f9f82e3cbfd79df383afe3c61540fdfded45a976 Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet_last.onnx differ diff --git a/hpvm/projects/onnx/models/keras/lenet.onnx b/hpvm/projects/onnx/models/keras/lenet.onnx new file mode 100644 index 0000000000000000000000000000000000000000..659764f2f46d05dad62c2187c13c665839fe3058 Binary files /dev/null and b/hpvm/projects/onnx/models/keras/lenet.onnx differ diff --git a/hpvm/projects/onnx/models/keras/mobilenet.onnx b/hpvm/projects/onnx/models/keras/mobilenet.onnx new file mode 100644 index 0000000000000000000000000000000000000000..947ab70daf9653af5f7d84349f92aca79d31c131 Binary files /dev/null and b/hpvm/projects/onnx/models/keras/mobilenet.onnx differ diff --git a/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx b/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx new file mode 100644 index 0000000000000000000000000000000000000000..998d7f05b540b97e0fa36daab280aebbb3574eff Binary files /dev/null and b/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx differ diff --git a/hpvm/projects/onnx/models/keras/resnet.onnx b/hpvm/projects/onnx/models/keras/resnet.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b9ad6bc843e3167151e74ad1a720130ef8421b4e Binary files /dev/null and b/hpvm/projects/onnx/models/keras/resnet.onnx differ diff --git a/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx b/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2395a9770aaf0df87d8e2acdad0935a8058fed3b Binary files /dev/null and b/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx differ diff --git a/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx b/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a96ddf4afffca4528d27b4f4fd482aeddc8ae165 Binary files /dev/null and b/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx differ diff --git a/hpvm/projects/onnx/models/mnist/mnist.onnx b/hpvm/projects/onnx/models/mnist/mnist.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fc1a3f733c6e6243dd23dacb125b7a372de55a50 Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/mnist.onnx differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f0072d51a480af615e92312608f75993be9f1136 Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..a6f4cdf92e27aaab21e44098b5b28e16048098e2 --- /dev/null +++ b/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb @@ -0,0 +1,2 @@ + +J(ãêsDU®ÄŒtÍEÚ'DWQeÄYôÐÄQôÄ3vÂNKBÄñ³Ä \ No newline at end of file diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b40ca9538b7b03111268892ca8d7c71a5e376d06 Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..786b006694a575f64fa212cb60d60d962fc042b0 --- /dev/null +++ b/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb @@ -0,0 +1,2 @@ + +J(E_ÅÓ;ùÒÄXê“Äy›Ä„*_DHԺÓ!‘Ã9ZÞ \ No newline at end of file diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d84a2064893ba68ad633391af74744dfc991a7cd Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..cb22e7b4ff7c12b9ec73ab832e34c3a566a51077 --- /dev/null +++ b/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb @@ -0,0 +1,2 @@ + +J(láÅ4‹Ä’†DMWÄ‘ÆDË¿Ä>á'Åìa¤&•B6hE \ No newline at end of file diff --git a/hpvm/projects/onnx/models/resnet50/resnet50.onnx b/hpvm/projects/onnx/models/resnet50/resnet50.onnx new file mode 100644 index 0000000000000000000000000000000000000000..09992ccb764246e182874eda716d535c83a82123 Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/resnet50.onnx differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..846b581f7683fe007535309dd7bba5e356e524f6 Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c357e69d8f002e509525cbf8bbb470fa6bc575e Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0454c0cd3e58115a8805a1f770ea5406b5a72dbb Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..631cf2cad41b3d582b4d792ef480f8fcdec261df Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..665be39c8ee7136b9341f5ecdb370433d8afc910 Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ac41dadf34359eb8ea9d185ad181138b32e09cec Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb differ diff --git a/hpvm/projects/onnx/profile/alexnet_keras.py b/hpvm/projects/onnx/profile/alexnet_keras.py new file mode 100644 index 0000000000000000000000000000000000000000..bcbf35f0c632eb27d300a94fde5882761701c53b --- /dev/null +++ b/hpvm/projects/onnx/profile/alexnet_keras.py @@ -0,0 +1,123 @@ +import numpy as np +from keras.datasets import cifar10 +from keras.models import Sequential +from keras.layers.core import Dense, Dropout, Flatten, Activation +from keras.layers.convolutional import Conv2D +from keras.optimizers import Adam +from keras.layers.pooling import MaxPooling2D +from keras.utils.np_utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras import backend as K +from keras import regularizers +from keras.callbacks import LearningRateScheduler +import sys +import struct +import keras +import numpy as np +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '1' + +def buildModel2(): + + activation_type = "tanh" + weight_decay = 1e-4 + + model = Sequential() + model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type, + input_shape=(3, 32, 32), padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.2)) + model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay))) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.3)) + + model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.4)) + + model.add(Flatten()) + #model.add(Flatten()) + #model.add(Dense(256)) + model.add(Dense(10)) + model.add(Activation('softmax')) + + return model + +def lr_schedule(epoch): + lrate = 0.001 + if epoch > 20: + lrate = 0.0005 + if epoch > 40: + lrate = 0.0003 + if epoch > 60: + lrate = 0.0001 + if epoch > 80: + lrate = 0.00005 + + return lrate + +def trainModel(model): + #dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/" + + #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) + # Compile the model + model.compile(loss='categorical_crossentropy', + optimizer=Adam(lr=0.0001, decay=1e-6), + #optimizer = opt_rms, + metrics=['accuracy']) + + #print to_categorical(Y_train, 10) + print (to_categorical(Y_train)) + + + datagen = ImageDataGenerator( + rotation_range=15, + width_shift_range=0.1, + height_shift_range=0.1, + horizontal_flip=True, + ) + datagen.fit(X_train) + + + model.fit(X_train, to_categorical(Y_train, 10), + batch_size=128, + shuffle=True, + epochs = 1, + #epochs=100, + validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)]) + + # Evaluate the model + scores = model.evaluate(X_test.astype('float32'), to_categorical(Y_test, 10)) + + print('Loss: %.3f' % scores[0]) + print('Accuracy: %.3f' % scores[1]) + + print ("*** TRAINED MODEL ****\n") + +K.set_image_data_format('channels_first') +(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() +test_labels = Y_test +train_labels = Y_train + +#X_train = X_train.astype('float32') +#X_test = X_test.astype('float32') +X_train = X_train / 255.0 +X_test = X_test / 255.0 + +mean = np.mean(X_train,axis=(0,1,2,3)) +std = np.std(X_train,axis=(0,1,2,3)) +X_train = (X_train-mean)/(std+1e-7) +X_test = (X_test-mean)/(std+1e-7) +model = buildModel2() +trainModel(model) +import time +start = time.time() +keras_result = model.predict(X_test[:8000]) +print("time:", time.time() - start) \ No newline at end of file diff --git a/hpvm/projects/onnx/profile/alexnet_onnx.py b/hpvm/projects/onnx/profile/alexnet_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..1bde4eaab39d901ef131f7cda59810721d701e1b --- /dev/null +++ b/hpvm/projects/onnx/profile/alexnet_onnx.py @@ -0,0 +1,45 @@ +import numpy as np +from keras.datasets import cifar10 +from keras import backend as K +import numpy as np +import os +import keras2onnx +import onnx +import onnxruntime +import time +os.environ['CUDA_VISIBLE_DEVICES'] = '1' + +K.set_image_data_format('channels_first') +(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() +test_labels = Y_test +train_labels = Y_train + +#X_train = X_train.astype('float32') +#X_test = X_test.astype('float32') +X_train = X_train / 255.0 +X_test = X_test / 255.0 + +mean = np.mean(X_train,axis=(0,1,2,3)) +std = np.std(X_train,axis=(0,1,2,3)) +X_train = (X_train-mean)/(std+1e-7) +X_test = (X_test-mean)/(std+1e-7) + +sess = onnxruntime.InferenceSession("../models/keras/alexnet.onnx") + +input_name = sess.get_inputs()[0].name +print("Input name :", input_name) +input_shape = sess.get_inputs()[0].shape +print("Input shape :", input_shape) +input_type = sess.get_inputs()[0].type +print("Input type :", input_type) + +output_name = sess.get_outputs()[0].name +print("Output name :", output_name) +output_shape = sess.get_outputs()[0].shape +print("Output shape :", output_shape) +output_type = sess.get_outputs()[0].type +print("Output type :", output_type) + +start_time = time.time() +ort_result = sess.run([output_name], {input_name: X_test.astype('float32')[:8000]}) +print("time: ", time.time() - start_time) \ No newline at end of file diff --git a/hpvm/projects/onnx/profile/alexnet_tvm.py b/hpvm/projects/onnx/profile/alexnet_tvm.py new file mode 100644 index 0000000000000000000000000000000000000000..5a3847335f43e0d9af9a5a51fd1ba90582f22489 --- /dev/null +++ b/hpvm/projects/onnx/profile/alexnet_tvm.py @@ -0,0 +1,45 @@ +import numpy as np +from keras.datasets import cifar10 +from keras import backend as K +import sys +import struct +import numpy as np +import os +import tvm +import tvm.relay as relay +from tvm.contrib import graph_runtime +os.environ['CUDA_VISIBLE_DEVICES'] = '1' + +K.set_image_data_format('channels_last') +(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() +X_test = X_test / 255.0 +mean = np.mean(X_train,axis=(0,1,2,3)) +std = np.std(X_train,axis=(0,1,2,3)) +X_test = (X_test-mean)/(std+1e-7) + +import onnx +onnx_model = onnx.load("../models/keras/alexnet_last.onnx") + +input_name = 'conv2d_8_input' +input_size = 8000 +shape_dict = {input_name: X_test[:input_size].shape} +mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) +target = 'cuda -libs=cudnn,cublas' +with relay.build_config(opt_level=3): + graph, lib, params = relay.build(mod, target, params=params) + +import time +ctx = tvm.gpu() +#data = np.random.uniform(-1, 1, size=data_shape).astype("float32") +# create module +module = graph_runtime.create(graph, lib, ctx) +# set input and parameters +module.set_input("conv2d_8_input", X_test[:input_size].astype("float32")) +module.set_input(**params) +# run +start_time = time.time() +module.run() +out_shape = (input_size, 10) +# get output +out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy() +print("Time:", time.time() - start_time) \ No newline at end of file diff --git a/hpvm/projects/onnx/profile/prof.nvvp b/hpvm/projects/onnx/profile/prof.nvvp new file mode 100644 index 0000000000000000000000000000000000000000..be5cbd996b6980e9e4bad773fe9f3abea1ccb0f6 Binary files /dev/null and b/hpvm/projects/onnx/profile/prof.nvvp differ diff --git a/hpvm/projects/onnx/setup.py b/hpvm/projects/onnx/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..7bcdce7a1d126e0c792bd3bf49026cb61b39f5c1 --- /dev/null +++ b/hpvm/projects/onnx/setup.py @@ -0,0 +1,12 @@ + +from setuptools import setup + +setup( + name='frontend', + version='1.0', + description='HPVM frontend modules', + author='Yuanjing Shi', + author_email='ys26@illinois.edu', + packages=['fronend'], + install_requires=[], +) diff --git a/hpvm/projects/onnx/specs.dump b/hpvm/projects/onnx/specs.dump new file mode 100644 index 0000000000000000000000000000000000000000..3016325b9bb2f94c3d66f0185408cccfa7cf9a43 --- /dev/null +++ b/hpvm/projects/onnx/specs.dump @@ -0,0 +1,1051 @@ + +Computer +******** + + +Summary +------- + +-Computer- +Processor : 40x Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz +Memory : 65830MB (28703MB used) +Operating System : Ubuntu 16.04.6 LTS +User Name : ys26 (Yuanjing Shi) +Date/Time : Tue 31 Mar 2020 07:33:16 PM CDT +-Display- +Resolution : 0x0 pixels +OpenGL Renderer : Unknown +X11 Vendor : (null) +-Multimedia- +Audio Adapter : HDA-Intel - HDA Intel PCH +Audio Adapter : HDA-Intel - HDA NVidia +Audio Adapter : HDA-Intel - HDA NVidia +-Input Devices- + Power Button + Power Button + HDA Intel PCH Front Mic + HDA Intel PCH Rear Mic + HDA Intel PCH Line + HDA Intel PCH Line Out Front + HDA Intel PCH Line Out Surround + HDA Intel PCH Line Out CLFE + HDA Intel PCH Front Headphone + HDA NVidia HDMI/DP,pcm : 3= + HDA NVidia HDMI/DP,pcm : 7= + HDA NVidia HDMI/DP,pcm : 8= + HDA NVidia HDMI/DP,pcm : 9= + HDA NVidia HDMI/DP,pcm : 3= + HDA NVidia HDMI/DP,pcm : 7= + HDA NVidia HDMI/DP,pcm : 8= + HDA NVidia HDMI/DP,pcm : 9= + Dell Dell USB Keyboard + Logitech USB-PS/2 Optical Mouse +-Printers- +No printers found +-SCSI Disks- +ATA Samsung SSD 850 +ATA Samsung SSD 850 +Generic Ultra HS-SD/MMC + +Operating System +---------------- + +-Version- +Kernel : Linux 4.15.0-66-generic (x86_64) +Compiled : #75~16.04.1-Ubuntu SMP Tue Oct 1 14:01:08 UTC 2019 +C Library : Unknown +Default C Compiler : GNU C Compiler version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.11) +Distribution : Ubuntu 16.04.6 LTS +-Current Session- +Computer Name : tyler +User Name : ys26 (Yuanjing Shi) +Home Directory : /home/ys26 +Desktop Environment : Terminal +-Misc- +Uptime : 67 days, 16 hours and 1 minute +Load Average : 19.85, 20.81, 20.51 + +Kernel Modules +-------------- + +-Loaded Modules- +nvidia_uvm +nfsv3 +nfs_acl +nfs +lockd : NFS file locking service version 0.5. +grace +fscache : FS Cache Manager +nvram +video : ACPI Video Driver +msr : x86 generic MSR driver +xt_tcpudp : Xtables: TCP, UDP and UDP-Lite match +iptable_filter : iptables filter table +ip_tables : IPv4 packet filter +x_tables : {ip,ip6,arp,eb}_tables backend module +input_leds : Input -> LEDs Bridge +snd_hda_codec_hdmi : HDMI HD-audio codec +binfmt_misc +xfs : SGI XFS with ACLs, security attributes, realtime, no debug enabled +nvidia_drm +nvidia_modeset +nvidia +intel_rapl : Driver for Intel RAPL (Running Average Power Limit) +sb_edac : MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - Ver: 1.1.2 +x86_pkg_temp_thermal : X86 PKG TEMP Thermal Driver +intel_powerclamp : Package Level C-state Idle Injection for Intel CPUs +coretemp : Intel Core temperature monitor +snd_hda_codec_realtek : Realtek HD-audio codec +kvm_intel +snd_hda_codec_generic : Generic HD-audio codec parser +kvm +snd_hda_intel : Intel HDA driver +snd_hda_codec : HDA codec core +snd_hda_core : HD-audio bus +snd_hwdep : Hardware dependent layer +irqbypass : IRQ bypass manager utility module +snd_pcm : Midlevel PCM code for ALSA. +intel_wmi_thunderbolt : Intel WMI Thunderbolt force power driver +intel_cstate +snd_seq_midi : Advanced Linux Sound Architecture sequencer MIDI synth. +intel_rapl_perf +snd_seq_midi_event : MIDI byte <-> sequencer event coder +drm_kms_helper : DRM KMS helper +snd_rawmidi : Midlevel RawMidi code for ALSA. +drm : DRM shared core routines +snd_seq : Advanced Linux Sound Architecture sequencer. +snd_seq_device : ALSA sequencer device management +ipmi_devintf : Linux device interface for the IPMI message handler. +snd_timer : ALSA timer interface +ipmi_msghandler : Incoming and outgoing message routing for an IPMI interface. +fb_sys_fops : Generic file read (fb in system RAM) +snd : Advanced Linux Sound Architecture driver for soundcards. +syscopyarea : Generic copyarea (sys-to-sys) +sysfillrect : Generic fill rectangle (sys-to-sys) +sysimgblt : 1-bit/8-bit to 1-32 bit color expansion (sys-to-sys) +lpc_ich : LPC interface for Intel ICH +soundcore : Core sound module +ioatdma +shpchp : Standard Hot Plug PCI Controller Driver +mac_hid +ib_iser : iSER (iSCSI Extensions for RDMA) Datamover +rdma_cm : Generic RDMA CM Agent +iw_cm : iWARP CM +ib_cm : InfiniBand CM +ib_core : core kernel InfiniBand API +iscsi_tcp : iSCSI/TCP data-path +libiscsi_tcp : iSCSI/TCP data-path +libiscsi : iSCSI library functions +scsi_transport_iscsi : iSCSI Transport Interface +parport_pc : PC-style parallel port driver +ppdev +lp +parport +sunrpc +autofs4 +btrfs +zstd_compress : Zstd Compressor +raid10 : RAID10 (striped mirror) personality for MD +raid456 : RAID4/5/6 (striping with parity) personality for MD +async_raid6_recov : asynchronous RAID-6 recovery api +async_memcpy : asynchronous memcpy api +async_pq : asynchronous raid6 syndrome generation/validation +async_xor : asynchronous xor/xor-zero-sum api +async_tx : Asynchronous Bulk Memory Transactions API +xor +raid6_pq : RAID6 Q-syndrome calculations +libcrc32c : CRC32c (Castagnoli) calculations +raid1 : RAID1 (mirroring) personality for MD +raid0 : RAID0 (striping) personality for MD +multipath : simple multi-path personality for MD +linear : Linear device concatenation personality for MD +uas +usb_storage : USB Mass Storage driver for Linux +hid_generic : HID generic driver +usbhid : USB HID core driver +hid +mxm_wmi : MXM WMI Driver +crct10dif_pclmul : T10 DIF CRC calculation accelerated with PCLMULQDQ. +crc32_pclmul +ghash_clmulni_intel : GHASH Message Digest Algorithm, acclerated by PCLMULQDQ-NI +pcbc : PCBC block cipher algorithm +igb : Intel(R) Gigabit Ethernet Network Driver +aesni_intel : Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized +dca +i2c_algo_bit : I2C-Bus bit-banging algorithm +aes_x86_64 : Rijndael (AES) Cipher Algorithm, asm optimized +ahci : AHCI SATA low-level driver +crypto_simd +ptp : PTP clocks support +glue_helper +cryptd : Software async crypto daemon +pps_core : LinuxPPS support (RFC 2783) - ver. 5.3.6 +libahci : Common AHCI SATA low-level routines +wmi : ACPI-WMI Mapping Driver + +Boots +----- + +-Boots- +Fri Jan 24 02:31 : 44..15.0-66-generi|still +Mon Nov 18 18:04 : 44..15.0-66-generi|still +Fri Nov 1 1:52 : 44..15.0-66-generi|- +Fri Nov 1 1:25 : 44..15.0-66-generi|- + +Languages +--------- + +-Available Languages- +en_US.utf8 : English locale for the USA + +Filesystems +----------- + +-Mounted File Systems- +udev /dev 0.00 % (31.4 GiB of 31.4 GiB) +tmpfs /run 7.15 % (5.8 GiB of 6.3 GiB) +/dev/mapper/tyler--vg-root / 5.81 % (1720.1 GiB of 1826.1 GiB) +tmpfs /dev/shm 0.00 % (31.4 GiB of 31.4 GiB) +tmpfs /run/lock 0.08 % (5.0 MiB of 5.0 MiB) +tmpfs /sys/fs/cgroup 0.00 % (31.4 GiB of 31.4 GiB) +/dev/sdb1 /srv/local 9.99 % (1676.1 GiB of 1862.1 GiB) +/dev/sdb1 /nix 9.99 % (1676.1 GiB of 1862.1 GiB) +/dev/sda2 /boot 11.01 % (3.3 GiB of 3.7 GiB) +vadve-file-01.cs.illinois.edu:/srv/home/deepanv2 /home/deepanv2 84.56 % (1580.7 GiB of 10238.0 GiB) +tmpfs /run/user/242233 0.00 % (6.3 GiB of 6.3 GiB) +vadve-file-01.cs.illinois.edu:/srv/home/nz11 /home/nz11 84.56 % (1580.7 GiB of 10238.0 GiB) +tmpfs /run/user/1091162 0.00 % (6.3 GiB of 6.3 GiB) +vadve-file-01.cs.illinois.edu:/srv/home/hsharif3 /home/hsharif3 84.56 % (1580.7 GiB of 10238.0 GiB) +tmpfs /run/user/641600 0.00 % (6.3 GiB of 6.3 GiB) +vadve-file-01.cs.illinois.edu:/srv/home/bjschre2 /home/bjschre2 84.56 % (1580.7 GiB of 10238.0 GiB) +tmpfs /run/user/373498 0.00 % (6.3 GiB of 6.3 GiB) +/etc/autofs/cs_vadve/auto.direct /shared 84.56 % (1580.7 GiB of 10238.0 GiB) +engr-linux-siebl1.engr.illinois.edu:/srv/software/megacli-8.07 /software/megacli-8.07 95.35 % (92.6 GiB of 1992.1 GiB) +vadve-file-01.cs.illinois.edu:/srv/shared /shared 84.56 % (1580.7 GiB of 10238.0 GiB) +engr-linux-siebl1.engr.illinois.edu:/srv/software/cuda-9.1 /software/cuda-9.1 95.35 % (92.6 GiB of 1992.1 GiB) +vadve-file-01.cs.illinois.edu:/srv/home/yifanz16 /home/yifanz16 84.56 % (1580.7 GiB of 10238.0 GiB) +tmpfs /run/user/1086025 0.00 % (6.3 GiB of 6.3 GiB) +vadve-file-01.cs.illinois.edu:/srv/home/ys26 /home/ys26 84.56 % (1580.7 GiB of 10238.0 GiB) +tmpfs /run/user/1055463 0.00 % (6.3 GiB of 6.3 GiB) + +Display +------- + +-Display- +Resolution : 0x0 pixels +Vendor : (null) +Version : (null) +-Monitors- +-Extensions- + +Environment Variables +--------------------- + +-Environment Variables- +MANPATH : /usr/share/lmod/lmod/share/man:: +NIX_PROFILES : /nix/var/nix/profiles/default /home/ys26/.nix-profile +XDG_SESSION_ID : 14482 +TERM : xterm-256color +SHELL : /bin/bash +DISTARCH : Linux-x86_64 +MODULEPATH_ROOT : /etc/modulefiles/software +SSH_CLIENT : 10.251.9.206 64268 22 +CONDA_SHLVL : 1 +CONDA_PROMPT_MODIFIER : (base) +LMOD_PKG : /usr/share/lmod/lmod +LMOD_VERSION : 7.7.14 +GTK_MODULES : gail:atk-bridge +SSH_TTY : /dev/pts/7 +USER : ys26 +LMOD_sys : Linux +CONDA_EXE : /home/ys26/anaconda3/bin/conda +_CE_CONDA +MAIL : /var/mail/ys26 +PATH : /home/ys26/anaconda3/bin:/home/ys26/anaconda3/condabin:/home/ys26/.nix-profile/bin:/nix/var/nix/profiles/default/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/ys26/.local/bin:/home/ys26/bin +CONDA_PREFIX : /home/ys26/anaconda3 +NIX_PATH : nixpkgs=/nix/var/nix/profiles/per-user/root/channels/nixpkgs:/nix/var/nix/profiles/per-user/root/channels +PWD : /home/ys26 +LANG : en_US.UTF-8 +MODULEPATH : /etc/modulefiles/software/Linux:/etc/modulefiles/software/Core:/usr/share/lmod/lmod/modulefiles/Core:/etc/modulefiles/env:/etc/modulefiles/class:/etc/modulefiles/software +NIX_SSL_CERT_FILE : /etc/ssl/certs/ca-certificates.crt +LMOD_CMD : /usr/share/lmod/lmod/libexec/lmod +_CE_M +KRB5CCNAME : FILE:/tmp/krb5cc_1055463_itHNAb +SHLVL : 1 +HOME : /home/ys26 +LANGUAGE : en_US: +SHOST : tyler +BASH_ENV : /usr/share/lmod/lmod/init/bash +CONDA_PYTHON_EXE : /home/ys26/anaconda3/bin/python +LMOD_arch : x86_64 +LOGNAME : ys26 +XDG_DATA_DIRS : /usr/local/share:/usr/share:/var/lib/snapd/desktop +SOFTPATH : /software +SSH_CONNECTION : 10.251.9.206 64268 130.126.136.179 22 +MODULESHOME : /usr/share/lmod/lmod +CONDA_DEFAULT_ENV : base +LMOD_SETTARG_FULL_SUPPORT : no +ARCH : x86_64 +XDG_RUNTIME_DIR : /run/user/1055463 +LMOD_DIR : /usr/share/lmod/lmod/libexec +NIX_USER_PROFILE_DIR : /nix/var/nix/profiles/per-user/ys26 +BASH_FUNC_module%% : () { eval $($LMOD_CMD bash "$@") && eval $(${LMOD_SETTARG_CMD:-:} -s sh) + +Users +----- + +-Users- +root : root +daemon : daemon +bin : bin +sys : sys +sync : sync +games : games +man : man +lp : lp +mail : mail +news : news +uucp : uucp +proxy : proxy +www-data : www-data +backup : backup +list : Mailing List Manager +irc : ircd +gnats : Gnats Bug-Reporting System (admin) +nobody : nobody +systemd-timesync : systemd Time Synchronization +systemd-network : systemd Network Management +systemd-resolve : systemd Resolver +systemd-bus-proxy : systemd Bus Proxy +syslog +_apt +messagebus +lxd +dnsmasq : dnsmasq +sshd +statd +postfix +colord : colord colour management daemon +ntp +clamav +sensu : Sensu Monitoring Framework +telegraf +lightdm : Light Display Manager +whoopsie +avahi-autoipd : Avahi autoip daemon +avahi : Avahi mDNS daemon +speech-dispatcher : Speech Dispatcher +hplip : HPLIP system user +kernoops : Kernel Oops Tracking Daemon +pulse : PulseAudio daemon +rtkit : RealtimeKit +saned +usbmux : usbmux daemon +nixbld1 : Nix build user 1 +nixbld2 : Nix build user 2 +nixbld3 : Nix build user 3 +nixbld4 : Nix build user 4 +nixbld5 : Nix build user 5 +nixbld6 : Nix build user 6 +nixbld7 : Nix build user 7 +nixbld8 : Nix build user 8 +nixbld9 : Nix build user 9 +nixbld10 : Nix build user 10 +nixbld11 : Nix build user 11 +nixbld12 : Nix build user 12 +nixbld13 : Nix build user 13 +nixbld14 : Nix build user 14 +nixbld15 : Nix build user 15 +nixbld16 : Nix build user 16 +nixbld17 : Nix build user 17 +nixbld18 : Nix build user 18 +nixbld19 : Nix build user 19 +nixbld20 : Nix build user 20 +nixbld21 : Nix build user 21 +nixbld22 : Nix build user 22 +nixbld23 : Nix build user 23 +nixbld24 : Nix build user 24 +nixbld25 : Nix build user 25 +nixbld26 : Nix build user 26 +nixbld27 : Nix build user 27 +nixbld28 : Nix build user 28 +nixbld29 : Nix build user 29 +nixbld30 : Nix build user 30 +nixbld31 : Nix build user 31 +nixbld32 : Nix build user 32 + +Devices +******* + + +Processor +--------- + +-Processors- +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2599.98MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2602.97MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2456.45MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2601.63MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.07MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.02MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.03MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.19MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2695.21MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2715.75MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2715.46MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1424.73MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1787.70MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2711.77MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1353.80MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2710.79MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2709.20MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2642.89MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2449.84MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.05MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2599.95MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2706.53MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2675.88MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2792.21MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1358.61MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1748.81MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2611.28MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1373.20MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2705.52MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2736.24MHz +Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2702.87MHz + +Memory +------ + +-Memory- +Total Memory : 65830480 kB +Free Memory : 14031276 kB +MemAvailable : 38320820 kB +Buffers : 398312 kB +Cached : 23096348 kB +Cached Swap : 126052 kB +Active : 33001128 kB +Inactive : 14469604 kB +Active(anon) : 23204468 kB +Inactive(anon) : 830232 kB +Active(file) : 9796660 kB +Inactive(file) : 13639372 kB +Unevictable : 3684 kB +Mlocked : 3684 kB +Virtual Memory : 3997692 kB +Free Virtual Memory : 2660556 kB +Dirty : 136 kB +Writeback : 8 kB +AnonPages : 23971776 kB +Mapped : 1338980 kB +Shmem : 63004 kB +Slab : 3122640 kB +SReclaimable : 1516712 kB +SUnreclaim : 1605928 kB +KernelStack : 30624 kB +PageTables : 108936 kB +NFS_Unstable : 0 kB +Bounce : 0 kB +WritebackTmp : 0 kB +CommitLimit : 36912932 kB +Committed_AS : 64285764 kB +VmallocTotal : 34359738367 kB +VmallocUsed : 0 kB +VmallocChunk : 0 kB +HardwareCorrupted : 0 kB +AnonHugePages : 2934784 kB +ShmemHugePages : 0 kB +ShmemPmdMapped : 0 kB +CmaTotal : 0 kB +CmaFree : 0 kB +HugePages_Total : 0 +HugePages_Free : 0 +HugePages_Rsvd : 0 +HugePages_Surp : 0 +Hugepagesize : 2048 kB +DirectMap4k : 63504064 kB +DirectMap2M : 3469312 kB +DirectMap1G : 2097152 kB + +PCI Devices +----------- + +-PCI Devices- +Host bridge : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D DMI2 (rev 01) +PCI bridge : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D PCI Express Root Port 1 (rev 01) (prog-if 00 [Normal decode]) +PCI bridge : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D PCI Express Root Port 3 (rev 01) (prog-if 00 [Normal decode]) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 1 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 2 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 3 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 4 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 5 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 6 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 7 (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Map/VTd_Misc/System Management (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D IIO Hot Plug (rev 01) +System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D IIO RAS/Control Status/Global Errors (rev 01) +PIC : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D I/O APIC (rev 01) (prog-if 20 [IO(X)-APIC]) + +USB Devices +----------- + + +Printers +-------- + +-Printers- +No printers found + +Battery +------- + +-No batteries- +No batteries found on this system + +Sensors +------- + +-Cooling Fans- +-Temperatures- +-Voltage Values- + +Input Devices +------------- + +-Input Devices- + Power Button + Power Button + HDA Intel PCH Front Mic + HDA Intel PCH Rear Mic + HDA Intel PCH Line + HDA Intel PCH Line Out Front + HDA Intel PCH Line Out Surround + HDA Intel PCH Line Out CLFE + HDA Intel PCH Front Headphone + HDA NVidia HDMI/DP,pcm : 3= + HDA NVidia HDMI/DP,pcm : 7= + HDA NVidia HDMI/DP,pcm : 8= + HDA NVidia HDMI/DP,pcm : 9= + HDA NVidia HDMI/DP,pcm : 3= + HDA NVidia HDMI/DP,pcm : 7= + HDA NVidia HDMI/DP,pcm : 8= + HDA NVidia HDMI/DP,pcm : 9= + Dell Dell USB Keyboard + Logitech USB-PS/2 Optical Mouse + +Storage +------- + +-SCSI Disks- +ATA Samsung SSD 850 +ATA Samsung SSD 850 +Generic Ultra HS-SD/MMC + +DMI +--- + +-BIOS- +Date : 11/09/2016 +Vendor : American Megatrends Inc. (www.ami.com) +Version : 2.0a +-Board- +Name : X10DAI +Vendor : Supermicro + +Resources +--------- + +-I/O Ports- +<tt>0000-0000 </tt> : PCI Bus 0000:80 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt>0000-0000 </tt> : PCI Bus 0000:80 +<tt>0000-0000 </tt> : PCI Bus 0000:80 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +<tt>0000-0000 </tt> : PCI Bus 0000:80 +<tt> 0000-0000 </tt> : PCI Bus 0000:81 +<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) +-Memory- +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ICH HD audio +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt>00000000-00000000 </tt> : PCI Bus 0000:80 +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) +<tt> 00000000-00000000 </tt> : ioatdma +-DMA- +<tt> 4</tt> : cascade + +Network +******* + + +Interfaces +---------- + +-Network Interfaces- +eth0 4338573.20MiB 13789563.94MiB 130.126.136.179 +eth1 0.00MiB 0.00MiB +lo 4132.02MiB 4132.02MiB 127.0.0.1 + +IP Connections +-------------- + +-Connections- +0.0.0.0:22 LISTEN 0.0.0.0:* tcp +127.0.0.1:3030 0.0.0.0:* udp +127.0.0.1:631 LISTEN 0.0.0.0:* tcp +127.0.0.1:3031 LISTEN 0.0.0.0:* tcp +127.0.0.1:8888 LISTEN 0.0.0.0:* tcp +127.0.0.1:25 LISTEN 0.0.0.0:* tcp +127.0.0.1:6010 ESTABLISHED 127.0.0.1:59498 tcp +0.0.0.0:445 LISTEN 0.0.0.0:* tcp +127.0.0.1:34409 LISTEN 0.0.0.0:* tcp +0.0.0.0:139 LISTEN 0.0.0.0:* tcp +0.0.0.0:47279 LISTEN 0.0.0.0:* tcp +0.0.0.0:111 0.0.0.0:* udp +0.0.0.0:39059 LISTEN 0.0.0.0:* tcp +127.0.1.1:53 0.0.0.0:* udp +127.0.0.1:6010 ESTABLISHED 127.0.0.1:59498 tcp +127.0.0.1:59498 ESTABLISHED 127.0.0.1:6010 tcp +130.126.136.179:765 ESTABLISHED 172.22.98.29:33920 tcp +130.126.136.179:40932 ESTABLISHED 172.22.246.11:4505 tcp +130.126.136.179:789 ESTABLISHED 130.126.112.49:2049 tcp +130.126.136.179:58444 ESTABLISHED 172.22.6.74:5672 tcp +130.126.136.179:37674 ESTABLISHED 172.22.6.146:8086 tcp +130.126.136.179:58446 ESTABLISHED 172.22.6.74:5672 tcp +130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp +130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp +130.126.136.179:45112 TIME_WAIT 172.22.98.29:111 tcp +130.126.136.179:58442 ESTABLISHED 172.22.6.74:5672 tcp +130.126.136.179:33326 ESTABLISHED 172.22.246.13:4505 tcp +130.126.136.179:59164 ESTABLISHED 172.22.246.140:4505 tcp +130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp +130.126.136.179:58440 ESTABLISHED 172.22.6.74:5672 tcp +130.126.136.179:745 ESTABLISHED 172.22.98.29:2049 tcp +130.126.136.179:51970 ESTABLISHED 172.22.246.14:10514 tcp +130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp +:::22 LISTEN :::* tcp6 +::1:631 LISTEN :::* tcp6 +::1:6010 LISTEN :::* tcp6 +:::445 LISTEN :::* tcp6 +:::44513 LISTEN :::* tcp6 +:::139 LISTEN :::* tcp6 +:::111 :::* udp6 +:::35349 LISTEN :::* tcp6 +0.0.0.0:59373 0.0.0.0:* udp +127.0.1.1:53 0.0.0.0:* udp +0.0.0.0:68 0.0.0.0:* udp +0.0.0.0:111 0.0.0.0:* udp +130.126.136.179:123 0.0.0.0:* udp +127.0.0.1:123 0.0.0.0:* udp +0.0.0.0:123 0.0.0.0:* udp +130.126.139.255:137 0.0.0.0:* udp +130.126.136.179:137 0.0.0.0:* udp +0.0.0.0:137 0.0.0.0:* udp +130.126.139.255:138 0.0.0.0:* udp +130.126.136.179:138 0.0.0.0:* udp +0.0.0.0:138 0.0.0.0:* udp +0.0.0.0:631 0.0.0.0:* udp +127.0.0.1:762 0.0.0.0:* udp +0.0.0.0:773 0.0.0.0:* udp +0.0.0.0:34156 0.0.0.0:* udp +0.0.0.0:34688 0.0.0.0:* udp +127.0.0.1:3030 0.0.0.0:* udp +0.0.0.0:37696 0.0.0.0:* udp +0.0.0.0:5353 0.0.0.0:* udp +0.0.0.0:42410 0.0.0.0:* udp +:::111 :::* udp6 +2620:0:e00:550a:5c4:123 :::* udp6 +2620:0:e00:550a:4cd:123 :::* udp6 +2620:0:e00:550a:f5e:123 :::* udp6 +2620:0:e00:550a:c5b:123 :::* udp6 +2620:0:e00:550a:cd1:123 :::* udp6 +2620:0:e00:550a:dcf:123 :::* udp6 +2620:0:e00:550a:d8d:123 :::* udp6 +fe80::ec4:7aff:fedc:123 :::* udp6 +2620:0:e00:550a:ec4:123 :::* udp6 +::1:123 :::* udp6 +:::123 :::* udp6 +:::773 :::* udp6 +:::37708 :::* udp6 +:::5353 :::* udp6 +:::39262 :::* udp6 +:::47288 :::* udp6 +:::55076 :::* udp6 + +Routing Table +------------- + +-IP routing table- +0.0.0.0 / 130.126.136.1 0.0.0.0 UG eth0 +130.126.136.0 / 0.0.0.0 255.255.252.0 U eth0 +169.254.0.0 / 0.0.0.0 255.255.0.0 U eth0 +192.17.2.10 / 130.126.136.1 255.255.255.255 UGH eth0 + +ARP Table +--------- + +-ARP Table- +130.126.136.121 90:b1:1c:2d:31:0e eth0 +130.126.136.198 40:a8:f0:47:38:97 eth0 +130.126.136.1 00:fe:c8:5d:df:ff eth0 +130.126.136.110 98:10:e8:f3:a2:4f eth0 +130.126.136.247 00:11:32:c3:57:3f eth0 +130.126.136.159 c4:34:6b:5e:26:eb eth0 +130.126.136.243 00:4e:01:bf:b6:ea eth0 +130.126.136.77 bc:30:5b:e0:f9:06 eth0 +130.126.139.44 98:90:96:dc:55:d9 eth0 +130.126.136.14 4c:d9:8f:17:1c:e2 eth0 +130.126.136.73 18:66:da:31:06:6b eth0 +130.126.136.63 18:66:da:3d:e2:68 eth0 +169.254.169.254 00:fe:c8:5d:df:ff eth0 +130.126.136.126 20:04:0f:f4:66:74 eth0 +130.126.139.118 70:85:c2:83:33:0b eth0 +130.126.136.210 2c:4d:54:46:4f:ed eth0 +130.126.136.195 48:4d:7e:f5:03:b2 eth0 +130.126.136.55 40:a8:f0:5e:b9:9f eth0 +130.126.136.118 90:b1:1c:1a:02:d6 eth0 +130.126.136.156 04:d4:c4:5b:50:f4 eth0 +130.126.136.240 e4:54:e8:79:44:68 eth0 +130.126.136.162 88:51:fb:58:dc:b2 eth0 +130.126.136.148 6c:2b:59:d5:f9:d2 eth0 +130.126.136.11 8c:dc:d4:29:8d:28 eth0 +130.126.136.74 c8:2a:14:1e:91:67 eth0 +130.126.138.79 74:e6:e2:da:8b:93 eth0 +130.126.136.7 18:03:73:3c:2e:f1 eth0 +130.126.136.129 00:25:00:ed:73:7d eth0 +130.126.136.245 00:04:4b:dd:43:42 eth0 +130.126.136.119 90:b1:1c:2d:2a:90 eth0 +130.126.136.182 90:b1:1c:5e:2c:72 eth0 +130.126.136.241 e4:54:e8:79:52:be eth0 +130.126.136.100 64:00:6a:5f:2e:25 eth0 +130.126.139.197 6c:3b:e5:14:6c:8f eth0 +130.126.136.12 54:bf:64:99:59:02 eth0 +130.126.136.23 8c:ec:4b:b2:4f:c9 eth0 +130.126.136.201 fc:aa:14:2f:a7:ab eth0 +130.126.136.191 40:6c:8f:24:b1:a4 eth0 +130.126.138.206 18:66:da:0d:6f:d0 eth0 +130.126.136.120 90:b1:1c:2d:2e:f8 eth0 +130.126.136.46 48:4d:7e:e6:a4:16 eth0 +130.126.138.135 38:c9:86:02:0b:99 eth0 +130.126.136.105 d0:94:66:7b:67:c0 eth0 +130.126.136.221 f8:b1:56:c6:32:86 eth0 +130.126.136.95 00:24:e8:44:aa:a7 eth0 +130.126.136.28 ac:1f:6b:48:8a:84 eth0 +130.126.136.242 00:4e:01:bf:6c:ca eth0 +130.126.136.76 18:66:da:00:bb:74 eth0 +130.126.136.97 98:90:96:bc:6f:66 eth0 +130.126.136.160 ac:87:a3:1f:4e:ca eth0 +130.126.136.9 6c:2b:59:c7:ad:19 eth0 +130.126.136.72 50:9a:4c:59:c3:5a eth0 +130.126.136.209 18:66:da:30:64:45 eth0 +130.126.136.83 00:13:3b:11:6a:a1 eth0 +130.126.136.5 4c:d9:8f:0c:1e:02 eth0 + +DNS Servers +----------- + +-Name servers- +127.0.1.1 + +Statistics +---------- + +-IP- +3340235186 : Total packets received +99269 : With invalid addresses +0 : Incoming packets discarded +0 : Incoming packets discarded +3340132854 : Incoming packets delivered +1109643438 : Requests sent out +16 : Outgoing packets dropped +248929 : Dropped because of missing route +6126 : Reassemblies required +3063 : Packets reassembled ok +-ICMP- +1355 : ICMP messages received +19 : Input ICMP message failed. +7599 : ICMP messages sent +0 : ICMP messages failed +-ICMPMSG- +-TCP- +26139644 : Active connections openings +3919 : Passive connection openings +25871410 : Failed connection attempts +4562 : Connection resets received +18 : Connections established +3302346907 : Segments received +10359040645 : Segments send out +2844431 : Segments retransmited +1 : Bad segments received. +25878380 : Resets sent +-UDP- +24979118 : Packets received +6362 : Packets to unknown port received. +4203 : Packet receive errors +662177 : Packets sent +-UDPLITE- +-TCPEXT- +1 : Resets received for embryonic SYN_RECV sockets +235858 : TCP sockets finished time wait in fast timer +171 : Time wait sockets recycled by time stamp +1825 : Packets rejects in established connections because of timestamp +6984717 : Delayed acks sent +3194 : Delayed acks further delayed because of locked socket +527487290 : Packet headers predicted +543016884 : Acknowledgments not containing data payload received +1893599141 : Predicted acknowledgments +554129 : Times recovered from packet loss by selective acknowledgements +119 : Congestion windows fully recovered without slow start +85 : Congestion windows partially recovered using Hoe heuristic +6456 : Congestion windows recovered without slow start by DSACK +1683 : Congestion windows recovered without slow start after partial ack +101 : Timeouts after SACK recovery +136 : Timeouts in loss state +2738885 : Fast retransmits +2730 : Retransmits in slow start +3141 : Other TCP timeouts +820 : SACK retransmits failed +32589 : DSACKs sent for old packets +607 : DSACKs sent for out of order packets +84876 : DSACKs received +567 : DSACKs for out of order packets received +650 : Connections reset due to unexpected data +4384 : Connections reset due to early user close +286 : Connections aborted due to timeout +3 : Times unabled to send RST due to no memory +-IPEXT- + +Shared Directories +------------------ + +-SAMBA- +-NFS- + +Benchmarks +********** + + +CPU Blowfish +------------ + +-CPU Blowfish- +<big><b>This Machine</b></big> 2600 MHz 0.665 +Intel(R) Celeron(R) M processor 1.50GHz (null) 26.1876862 +PowerPC 740/750 (280.00MHz) (null) 172.816713 + +CPU CryptoHash +-------------- + +-CPU CryptoHash- +<big><b>This Machine</b></big> 2600 MHz 1697.719 + +CPU Fibonacci +------------- + +-CPU Fibonacci- +<big><b>This Machine</b></big> 2600 MHz 2.166 +Intel(R) Celeron(R) M processor 1.50GHz (null) 8.1375674 +PowerPC 740/750 (280.00MHz) (null) 58.07682 + +CPU N-Queens +------------ + +-CPU N-Queens- +<big><b>This Machine</b></big> 2600 MHz 0.592 + +FPU FFT +------- + +-FPU FFT- +<big><b>This Machine</b></big> 2600 MHz 1.072 + +FPU Raytracing +-------------- + +-FPU Raytracing- +<big><b>This Machine</b></big> 2600 MHz 6.178 +Intel(R) Celeron(R) M processor 1.50GHz (null) 40.8816714 +PowerPC 740/750 (280.00MHz) (null) 161.312647 diff --git a/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb b/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..bbac5d491d80f5f4aaa9286b2323704e55555b48 --- /dev/null +++ b/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import numpy as np\n", + "import onnx\n", + "import glob\n", + "from onnxruntime.backend.backend import OnnxRuntimeBackend as backend\n", + "\n", + "from onnx import numpy_helper\n", + "\n", + "# onnx2hpvm modules\n", + "from onnx2hpvm.onnx_translator import from_onnx_to_hpvm" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "model = onnx.load('../models/mnist/mnist.onnx')\n", + "test_data_dir = '../models/mnist/test_data_set_0'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "# Load inputs\n", + "inputs = []\n", + "inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))\n", + "print(inputs_num)\n", + "for i in range(inputs_num):\n", + " input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))\n", + " tensor = onnx.TensorProto()\n", + " with open(input_file, 'rb') as f:\n", + " tensor.ParseFromString(f.read())\n", + " inputs.append(numpy_helper.to_array(tensor))\n", + "\n", + "# Load reference outputs\n", + "ref_outputs = []\n", + "ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb')))\n", + "for i in range(ref_outputs_num):\n", + " output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i))\n", + " tensor = onnx.TensorProto()\n", + " with open(output_file, 'rb') as f:\n", + " tensor.ParseFromString(f.read())\n", + " ref_outputs.append(numpy_helper.to_array(tensor))\n", + "\n", + "# Run the model on the backend\n", + "outputs = list(backend.run_model(model, inputs))\n", + "\n", + "#from_onnx_to_hpvm(model)\n", + "# Compare the results with reference outputs.\n", + "#for ref_o, o in zip(ref_outputs, outputs):\n", + "# np.testing.assert_almost_equal(ref_o, o)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/alexnet_keras.ipynb b/hpvm/projects/onnx/src/alexnet_keras.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3e46e220eb5438188d19e06934c6a9ef780cfa21 --- /dev/null +++ b/hpvm/projects/onnx/src/alexnet_keras.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from keras.datasets import cifar10\n", + "from keras.models import Sequential\n", + "from keras.layers.core import Dense, Dropout, Flatten, Activation\n", + "from keras.layers.convolutional import Conv2D\n", + "from keras.optimizers import Adam\n", + "from keras.layers.pooling import MaxPooling2D\n", + "from keras.utils.np_utils import to_categorical\n", + "from keras.preprocessing.image import ImageDataGenerator\n", + "from keras import backend as K\n", + "from keras import regularizers\n", + "from keras.callbacks import LearningRateScheduler\n", + "import sys\n", + "import struct\n", + "import keras\n", + "import numpy as np\n", + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def buildModel2():\n", + "\n", + " activation_type = \"tanh\"\n", + " weight_decay = 1e-4\n", + " \n", + " model = Sequential()\n", + " model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type,\n", + " input_shape=(3, 32, 32), padding = 'same',\n", + " kernel_regularizer=regularizers.l2(weight_decay) ))\n", + " model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n", + " model.add(Dropout(0.2))\n", + " model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same',\n", + " kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n", + " model.add(Dropout(0.3))\n", + "\n", + " model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n", + " kernel_regularizer=regularizers.l2(weight_decay) )) \n", + " model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n", + " kernel_regularizer=regularizers.l2(weight_decay) ))\n", + " model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n", + " kernel_regularizer=regularizers.l2(weight_decay) ))\n", + " model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Flatten())\n", + " #model.add(Flatten())\n", + " #model.add(Dense(256))\n", + " model.add(Dense(10))\n", + " model.add(Activation('softmax'))\n", + " \n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def lr_schedule(epoch):\n", + " lrate = 0.001\n", + " if epoch > 20:\n", + " lrate = 0.0005\n", + " if epoch > 40:\n", + " lrate = 0.0003\n", + " if epoch > 60:\n", + " lrate = 0.0001\n", + " if epoch > 80:\n", + " lrate = 0.00005 \n", + " \n", + " return lrate" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def trainModel(model): \n", + " #dir_prefix = \"/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/\"\n", + "\n", + " #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)\n", + " # Compile the model\n", + " model.compile(loss='categorical_crossentropy',\n", + " optimizer=Adam(lr=0.0001, decay=1e-6),\n", + " #optimizer = opt_rms,\n", + " metrics=['accuracy'])\n", + "\n", + " #print to_categorical(Y_train, 10)\n", + " print (to_categorical(Y_train))\n", + "\n", + "\n", + " datagen = ImageDataGenerator(\n", + " rotation_range=15,\n", + " width_shift_range=0.1,\n", + " height_shift_range=0.1,\n", + " horizontal_flip=True,\n", + " )\n", + " datagen.fit(X_train)\n", + "\n", + " \n", + " model.fit(X_train, to_categorical(Y_train, 10),\n", + " batch_size=128,\n", + " shuffle=True,\n", + " epochs = 1,\n", + " #epochs=100,\n", + " validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)])\n", + "\n", + " # Evaluate the model\n", + " scores = model.evaluate(X_test.astype('float32'), to_categorical(Y_test, 10))\n", + "\n", + " print('Loss: %.3f' % scores[0])\n", + " print('Accuracy: %.3f' % scores[1])\n", + " \n", + " print (\"*** TRAINED MODEL ****\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "K.set_image_data_format('channels_first')\n", + "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n", + "test_labels = Y_test\n", + "train_labels = Y_train\n", + "\n", + "#X_train = X_train.astype('float32')\n", + "#X_test = X_test.astype('float32')\n", + "X_train = X_train / 255.0\n", + "X_test = X_test / 255.0\n", + "\n", + "mean = np.mean(X_train,axis=(0,1,2,3))\n", + "std = np.std(X_train,axis=(0,1,2,3)) \n", + "X_train = (X_train-mean)/(std+1e-7)\n", + "X_test = (X_test-mean)/(std+1e-7)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = X_train[:8000]\n", + "X_test = X_test[:8000]\n", + "Y_train = Y_train[:8000]\n", + "Y_test = Y_test[:8000]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 1.]\n", + " [0. 0. 0. ... 0. 0. 1.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 1.]\n", + " [0. 1. 0. ... 0. 0. 0.]\n", + " [0. 1. 0. ... 0. 0. 0.]]\n", + "WARNING:tensorflow:Variable *= will be deprecated. Use `var.assign(var * other)` if you want assignment to the variable value or `x = x * y` if you want a new python Tensor object.\n", + "Train on 50000 samples, validate on 10000 samples\n", + "Epoch 1/1\n", + "50000/50000 [==============================] - 12s 232us/step - loss: 2.0118 - acc: 0.3704 - val_loss: 1.8003 - val_acc: 0.4312\n", + "10000/10000 [==============================] - 1s 150us/step\n", + "Loss: 1.800\n", + "Accuracy: 0.431\n", + "*** TRAINED MODEL ****\n", + "\n" + ] + } + ], + "source": [ + "model = buildModel2()\n", + "trainModel(model)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 1.0208089351654053\n" + ] + } + ], + "source": [ + "import time\n", + "start = time.time()\n", + "keras_result = model.predict(X_test[:8000])\n", + "print(\"time:\", time.time() - start)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import keras2onnx\n", + "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", + "import onnx\n", + "onnx.save(onnx_model, \"../models/keras/alexnet8k.onnx\")\n", + "import pickle\n", + "with open('keras_dump', 'wb') as fp:\n", + " pickle.dump(keras_result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#onnx_model = keras2onnx.convert_keras(model, model.name)\n", + "import onnxruntime\n", + "sess = onnxruntime.InferenceSession(\"../models/keras/alexnet.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input_name = sess.get_inputs()[0].name\n", + "print(\"Input name :\", input_name)\n", + "input_shape = sess.get_inputs()[0].shape\n", + "print(\"Input shape :\", input_shape)\n", + "input_type = sess.get_inputs()[0].type\n", + "print(\"Input type :\", input_type)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output_name = sess.get_outputs()[0].name\n", + "print(\"Output name :\", output_name) \n", + "output_shape = sess.get_outputs()[0].shape\n", + "print(\"Output shape :\", output_shape)\n", + "output_type = sess.get_outputs()[0].type\n", + "print(\"Output type :\", output_type)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#K.clear_session()\n", + "start = time.time()\n", + "ort_result = sess.run([output_name], {input_name: X_test.astype('float32')})\n", + "import pickle\n", + "\n", + "with open('dumps/ort_dump', 'wb') as fp:\n", + " pickle.dump(ort_result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('ort_dump', 'rb') as fp:\n", + " ort_res = pickle.load(fp)\n", + "with open ('keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(ort_res[0], keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 5) #using decimal of 3 would pass test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(ort_res[0])\n", + "print(\"--------------\")\n", + "print(keras_res)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/alexnet_onnx.ipynb b/hpvm/projects/onnx/src/alexnet_onnx.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..73755ecf1ccc27eedc5f1319c20e30473936c3e8 --- /dev/null +++ b/hpvm/projects/onnx/src/alexnet_onnx.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from keras.datasets import cifar10\n", + "from keras import backend as K\n", + "import numpy as np\n", + "import os\n", + "import keras2onnx\n", + "import onnx\n", + "import onnxruntime\n", + "import time\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "K.set_image_data_format('channels_first')\n", + "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n", + "test_labels = Y_test\n", + "train_labels = Y_train\n", + "\n", + "#X_train = X_train.astype('float32')\n", + "#X_test = X_test.astype('float32')\n", + "X_train = X_train / 255.0\n", + "X_test = X_test / 255.0\n", + "\n", + "mean = np.mean(X_train,axis=(0,1,2,3))\n", + "std = np.std(X_train,axis=(0,1,2,3)) \n", + "X_train = (X_train-mean)/(std+1e-7)\n", + "X_test = (X_test-mean)/(std+1e-7)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sess = onnxruntime.InferenceSession(\"../models/keras/alexnet.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input name : conv2d_1_input\n", + "Input shape : [None, 3, 32, 32]\n", + "Input type : tensor(float)\n" + ] + } + ], + "source": [ + "input_name = sess.get_inputs()[0].name\n", + "print(\"Input name :\", input_name)\n", + "input_shape = sess.get_inputs()[0].shape\n", + "print(\"Input shape :\", input_shape)\n", + "input_type = sess.get_inputs()[0].type\n", + "print(\"Input type :\", input_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Output name : activation_1\n", + "Output shape : [None, 10]\n", + "Output type : tensor(float)\n" + ] + } + ], + "source": [ + "output_name = sess.get_outputs()[0].name\n", + "print(\"Output name :\", output_name) \n", + "output_shape = sess.get_outputs()[0].shape\n", + "print(\"Output shape :\", output_shape)\n", + "output_type = sess.get_outputs()[0].type\n", + "print(\"Output type :\", output_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 1.3165459632873535\n" + ] + } + ], + "source": [ + "#X_test = np.moveaxis(X_test, -1, 1)\n", + "X_test.shape\n", + "#K.clear_session()\n", + "start_time = time.time()\n", + "ort_result = sess.run([output_name], {input_name: X_test.astype('float32')[:8000]})\n", + "print(\"time: \", time.time() - start_time)\n", + "import pickle\n", + "\n", + "with open('dumps/ort_dump', 'wb') as fp:\n", + " pickle.dump(ort_result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'dumps/keras_dump'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-20-a59ffee1b5dd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'dumps/ort_dump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mort_res\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'dumps/keras_dump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mkeras_res\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'dumps/keras_dump'" + ] + } + ], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/ort_dump', 'rb') as fp:\n", + " ort_res = pickle.load(fp)\n", + "with open ('dumps/keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(ort_res[0], keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/alexnet_tvm.ipynb b/hpvm/projects/onnx/src/alexnet_tvm.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..eaa92800e5ef55e1de64cbbde9395872727a4e5b --- /dev/null +++ b/hpvm/projects/onnx/src/alexnet_tvm.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from keras.datasets import cifar10\n", + "from keras import backend as K\n", + "import sys\n", + "import struct\n", + "import numpy as np\n", + "import os\n", + "import tvm\n", + "import tvm.relay as relay\n", + "from tvm.contrib import graph_runtime\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "K.set_image_data_format('channels_last')\n", + "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n", + "X_test = X_test / 255.0\n", + "mean = np.mean(X_train,axis=(0,1,2,3))\n", + "std = np.std(X_train,axis=(0,1,2,3)) \n", + "X_test = (X_test-mean)/(std+1e-7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#print(X_test.shape)\n", + "#X_test = X_test[:8000]\n", + "#print(X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import onnx\n", + "onnx_model = onnx.load(\"../models/keras/alexnet_last.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "target='cuda -libs=cudnn,cublas'\n", + "input_name = 'conv2d_8_input'\n", + "shape_dict = {input_name: X_test[:1000].shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "ctx = tvm.gpu()\n", + "with relay.build_config(opt_level=3):\n", + " executor = relay.build_module.create_executor('graph', mod, ctx, target)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# LLVM EXECUTE SUCCEEDED\n", + "import time\n", + "start_time = time.time()\n", + "tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')[:1000]), **params)\n", + "print(\"Time:\", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n", + "import pickle\n", + "with open('dumps/tvm_dump', 'wb') as fp:\n", + " pickle.dump(tvm_output, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 4096, 'float32'), (10, 4096, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 256, 8, 8, 'float32'), (256, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 384, 8, 8, 'float32'), (256, 384, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 192, 8, 8, 'float32'), (384, 192, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 64, 16, 16, 'float32'), (192, 64, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 3, 32, 32, 'float32'), (64, 3, 11, 11, 'float32'), (1, 1), (5, 5), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" + ] + } + ], + "source": [ + "input_name = 'conv2d_8_input'\n", + "input_size = 8000\n", + "shape_dict = {input_name: X_test[:input_size].shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "target = 'cuda -libs=cudnn,cublas'\n", + "with relay.build_config(opt_level=3):\n", + " graph, lib, params = relay.build(mod, target, params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "TVMError", + "evalue": "Traceback (most recent call last):\n [bt] (7) /home/ys26/tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7f664ddc32b1]\n [bt] (6) /home/ys26/tvm/build/libtvm.so(+0xbecd74) [0x7f664de16d74]\n [bt] (5) /home/ys26/tvm/build/libtvm.so(+0xbecc19) [0x7f664de16c19]\n [bt] (4) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntimeCreate(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module const&, std::vector<DLContext, std::allocator<DLContext> > const&)+0xcf) [0x7f664de16a1f]\n [bt] (3) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::Init(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module, std::vector<DLContext, std::allocator<DLContext> > const&)+0x258) [0x7f664de16698]\n [bt] (2) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::SetupStorage()+0x65f) [0x7f664de1334f]\n [bt] (1) /home/ys26/tvm/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x1ef) [0x7f664dd9fcaf]\n [bt] (0) /home/ys26/tvm/build/libtvm.so(tvm::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xde1) [0x7f664de226d1]\n File \"/home/ys26/tvm/src/runtime/cuda/cuda_device_api.cc\", line 119\nCUDA: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: out of memory", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTVMError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-6-0bd1741a62f4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# create module\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mmodule\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgraph_runtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;31m# set input and parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"conv2d_8_input\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0minput_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"float32\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/tvm/python/tvm/contrib/graph_runtime.py\u001b[0m in \u001b[0;36mcreate\u001b[0;34m(graph_json_str, libmod, ctx)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mfcreate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_global_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tvm.graph_runtime.create\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mGraphModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph_json_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mdevice_type_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_device_ctx\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlibmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/tvm/python/tvm/_ffi/_ctypes/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtcodes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m ctypes.byref(ret_val), ctypes.byref(ret_tcode)) != 0:\n\u001b[0;32m--> 207\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mget_last_ffi_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 208\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTVMError\u001b[0m: Traceback (most recent call last):\n [bt] (7) /home/ys26/tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7f664ddc32b1]\n [bt] (6) /home/ys26/tvm/build/libtvm.so(+0xbecd74) [0x7f664de16d74]\n [bt] (5) /home/ys26/tvm/build/libtvm.so(+0xbecc19) [0x7f664de16c19]\n [bt] (4) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntimeCreate(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module const&, std::vector<DLContext, std::allocator<DLContext> > const&)+0xcf) [0x7f664de16a1f]\n [bt] (3) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::Init(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module, std::vector<DLContext, std::allocator<DLContext> > const&)+0x258) [0x7f664de16698]\n [bt] (2) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::SetupStorage()+0x65f) [0x7f664de1334f]\n [bt] (1) /home/ys26/tvm/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x1ef) [0x7f664dd9fcaf]\n [bt] (0) /home/ys26/tvm/build/libtvm.so(tvm::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xde1) [0x7f664de226d1]\n File \"/home/ys26/tvm/src/runtime/cuda/cuda_device_api.cc\", line 119\nCUDA: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: out of memory" + ] + } + ], + "source": [ + "import time\n", + "ctx = tvm.gpu()\n", + "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", + "# create module\n", + "module = graph_runtime.create(graph, lib, ctx)\n", + "# set input and parameters\n", + "module.set_input(\"conv2d_8_input\", X_test[:input_size].astype(\"float32\"))\n", + "module.set_input(**params)\n", + "# run\n", + "start_time = time.time()\n", + "module.run()\n", + "out_shape = (input_size, 10)\n", + "# get output\n", + "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", + "print(\"Time:\", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/tvm_dump', 'rb') as fp:\n", + " tvm_res = pickle.load(fp)\n", + "with open ('dumps/keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(tvm_res, keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test\n", + "print(\"Accuracy matched!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/alexnet_tvm.py b/hpvm/projects/onnx/src/alexnet_tvm.py new file mode 100644 index 0000000000000000000000000000000000000000..dbbe8c6f8cb5f4e0cb98a467784ded7cfb384d31 --- /dev/null +++ b/hpvm/projects/onnx/src/alexnet_tvm.py @@ -0,0 +1,40 @@ +import numpy as np +from keras.datasets import cifar10 +from keras import backend as K +import sys +import struct +import numpy as np +import os +import tvm +import tvm.relay as relay +from tvm.contrib import graph_runtime +os.environ['CUDA_VISIBLE_DEVICES'] = '1' + +K.set_image_data_format('channels_last') +(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() +test_labels = Y_test +train_labels = Y_train + +#X_train = X_train.astype('float32') +#X_test = X_test.astype('float32') +X_train = X_train / 255.0 +X_test = X_test / 255.0 + +mean = np.mean(X_train,axis=(0,1,2,3)) +std = np.std(X_train,axis=(0,1,2,3)) +X_train = (X_train-mean)/(std+1e-7) +X_test = (X_test-mean)/(std+1e-7) + +import onnx +onnx_model = onnx.load("../models/keras/alexnet_last.onnx") + +target = tvm.target.cuda() +input_name = 'conv2d_8_input' +shape_dict = {input_name: X_test.shape} +mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) +ctx = tvm.gpu(0) +with relay.build_config(opt_level=3): + executor = relay.build_module.create_executor('graph', mod, ctx, target) + +dtype = 'float32' +tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')), **params) \ No newline at end of file diff --git a/hpvm/projects/onnx/src/keras_models/alexnet.py b/hpvm/projects/onnx/src/keras_models/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..b562c7d658969c8efe8dca20a0502475268a7af2 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/alexnet.py @@ -0,0 +1,174 @@ + +import numpy as np +from keras.datasets import cifar10 +from keras.models import Sequential +from keras.layers.core import Dense, Dropout, Flatten, Activation +from keras.layers.convolutional import Conv2D +from keras.optimizers import Adam +from keras.layers.pooling import MaxPooling2D +from keras.utils.np_utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras import backend as K +from keras import regularizers +from keras.callbacks import LearningRateScheduler +import sys +import struct +import keras +import numpy as np +import os + + + +def lr_schedule(epoch): + lrate = 0.001 + if epoch > 20: + lrate = 0.0005 + if epoch > 40: + lrate = 0.0003 + if epoch > 60: + lrate = 0.0001 + if epoch > 80: + lrate = 0.00005 + + return lrate + + + +def buildModel2(): + + activation_type = "tanh" + weight_decay = 1e-4 + + model = Sequential() + model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type, + input_shape=(3, 32, 32), padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.2)) + model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay))) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.3)) + + model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.4)) + + model.add(Flatten()) + #model.add(Flatten()) + #model.add(Dense(256)) + model.add(Dense(10)) + model.add(Activation('softmax')) + + return model + + + +def buildModel(): + + model = Sequential() + model.add(Conv2D(128, kernel_size=(3, 3), activation='tanh', input_shape=(3, 32, 32), padding = 'same')) + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + #model.add(Dropout(0.25)) + + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + #model.add(Dropout(0.25)) + + model.add(Flatten()) + #model.add(Flatten()) + model.add(Dense(4096, activation='tanh')) + #model.add(Dropout(0.5)) + model.add(Dense(2048, activation='tanh')) + model.add(Dense(10, activation='tanh')) + model.add(Activation('softmax')) + + return model + + + + +def trainModel(model): + + (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() + test_labels = Y_test + train_labels = Y_train + + #X_train = X_train.astype('float32') + #X_test = X_test.astype('float32') + X_train = X_train / 255.0 + X_test = X_test / 255.0 + + mean = np.mean(X_train,axis=(0,1,2,3)) + std = np.std(X_train,axis=(0,1,2,3)) + X_train = (X_train-mean)/(std+1e-7) + X_test = (X_test-mean)/(std+1e-7) + + dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/" + + #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) + # Compile the model + model.compile(loss='categorical_crossentropy', + optimizer=Adam(lr=0.0001, decay=1e-6), + #optimizer = opt_rms, + metrics=['accuracy']) + + #print to_categorical(Y_train, 10) + print (to_categorical(Y_train)) + + + datagen = ImageDataGenerator( + rotation_range=15, + width_shift_range=0.1, + height_shift_range=0.1, + horizontal_flip=True, + ) + datagen.fit(X_train) + + + model.fit(X_train, to_categorical(Y_train, 10), + batch_size=128, + shuffle=True, + epochs = 1, + #epochs=100, + validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)]) + + # Evaluate the model + scores = model.evaluate(X_test, to_categorical(Y_test, 10)) + + print('Loss: %.3f' % scores[0]) + print('Accuracy: %.3f' % scores[1]) + + print ("*** TRAINED MODEL ****\n") + + + #dumpCalibrationData("calibration_data/alexnet_calib.bin", X_train, + # "calibration_data/alexnet_train_labels.bin", train_labels) + + #translate_to_approxhpvm(model, "data/alexnet_cifar10/", X_test, test_labels, 10) + + + +if __name__ == "__main__": + + os.environ["CUDA_VISIBLE_DEVICES"] = "0" + # Changing to NCHW format + K.set_image_data_format('channels_first') + + model = buildModel2() + trainModel(model) + import keras2onnx + onnx_model = keras2onnx.convert_keras(model, model.name) + import onnx + onnx.save(onnx_model, "../models/keras/alexnet.onnx") diff --git a/hpvm/projects/onnx/src/keras_models/alexnet2.py b/hpvm/projects/onnx/src/keras_models/alexnet2.py new file mode 100644 index 0000000000000000000000000000000000000000..812b212165666370092a3d55e8482643b550f830 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/alexnet2.py @@ -0,0 +1,243 @@ + +import keras +from keras.models import Sequential +from keras.utils import np_utils +from keras.preprocessing.image import ImageDataGenerator +from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization +from keras.layers import Conv2D, MaxPooling2D +from keras.datasets import cifar10 +from keras import regularizers +from keras.callbacks import LearningRateScheduler +import numpy as np +import os +import struct +from keras import backend as K +from approxhpvm_translator import translate_to_approxhpvm + + + +def dumpWeights(file_name, weights, N, H, W, C): + # NOTE: Writing the NHWC weights array as NCHW + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + f.write(weights[i][k][l][j]) + + f.close() + + +def dumpConvWeights(file_name, weights, N, C, H, W): + + print (weights.shape) + + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + f.write(weights[k][l][j][i]) + f.close() + + + +def dumpFcWeights(file_name, weights, H, W): + + print (weights.shape) + + f = open(file_name, "wb") + for i in range(H): + for j in range(W): + f.write(weights[i][j]) + f.close() + + +def dumpFcBias(file_name, bias, W): + + print (bias.shape) + + f = open(file_name, "wb") + for i in range(W): + f.write(bias[i]) + f.close() + + +def dumpLabels(file_name, Y_test): + + f = open(file_name, "wb") + + labels_map = {} + for label in Y_test: + label_val = np.int8(label[0]) + if label_val not in labels_map: + labels_map[label_val] = 0 + labels_map[label_val] += 1 + + f.write(label_val) + + f.close() + + + +def dumpData(X_test, file_name, N, C, H, W): + + print (X_test.shape) + + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l])) + f.write(np.float32(val[0])) + + f.close() + + + + + +def lr_schedule(epoch): + lrate = 0.001 + if epoch > 75: + lrate = 0.0005 + if epoch > 100: + lrate = 0.0003 + return lrate + + +def lr_schedule2(epoch): + lrate = 0.0005 + if epoch > 100: + lrate = 0.0003 + if epoch > 200: + lrate = 0.0002 + if epoch > 250: + lrate = 0.0001 + if epoch > 300: + lrate = 0.00003 + + return lrate + + +K.set_image_data_format('channels_first') + +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +test_labels = y_test +x_train = x_train.astype('float32') +x_test = x_test.astype('float32') + +#z-score +mean = np.mean(x_train,axis=(0,1,2,3)) +std = np.std(x_train,axis=(0,1,2,3)) +x_train = (x_train-mean)/(std+1e-7) +x_test = (x_test-mean)/(std+1e-7) + + +# Dumping test data and test labels +dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/" + +dumpLabels(dir_prefix + "test_labels.bin", y_test) +dumpData(x_test, dir_prefix + "norm_cifar_input.bin", 10000, 3, 32, 32) + + + +num_classes = 10 +y_train = np_utils.to_categorical(y_train,num_classes) +y_test = np_utils.to_categorical(y_test,num_classes) + +weight_decay = 1e-4 +activation_type = 'tanh' + + +os.environ["CUDA_VISIBLE_DEVICES"] = "0" + + +model = Sequential() +model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:])) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(MaxPooling2D(pool_size=(2,2))) +model.add(Dropout(0.2)) + +model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(MaxPooling2D(pool_size=(2,2))) +model.add(Dropout(0.3)) + +model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(MaxPooling2D(pool_size=(2,2))) +model.add(Dropout(0.4)) + +model.add(Flatten()) +model.add(Dense(num_classes)) +model.add(Activation('softmax')) +model.summary() + +#data augmentation +datagen = ImageDataGenerator( + rotation_range=15, + width_shift_range=0.1, + height_shift_range=0.1, + horizontal_flip=True, + ) + +datagen.fit(x_train) + + +#training +batch_size = 64 + +opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) +model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy']) +model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),\ + steps_per_epoch=x_train.shape[0] // batch_size, #epochs=350,\ + epochs=1, + verbose=1,validation_data=(x_test,y_test),callbacks=[LearningRateScheduler(lr_schedule2)]) +#save to disk +model_json = model.to_json() +with open('model.json', 'w') as json_file: + json_file.write(model_json) + model.save_weights('model.h5') + +#testing +scores = model.evaluate(x_test, y_test, batch_size=128, verbose=1) +print('\nTest result: %.3f loss: %.3f' % (scores[1]*100,scores[0])) + + +translate_to_approxhpvm(model, "alexnet2_cifar10_test/", x_test, test_labels, "alexnet2_cifar10/", y_test) +sys.exit(0) + + + +params = model.get_weights() +dumpConvWeights(dir_prefix + "conv1.bin", params[0], 32, 3, 3, 3) +dumpFcBias(dir_prefix + "conv1_bias.bin", params[1], 32) +dumpConvWeights(dir_prefix + "conv2.bin", params[2], 32, 32, 3, 3) +dumpFcBias(dir_prefix + "conv2_bias.bin", params[3], 32) +dumpConvWeights(dir_prefix + "conv3.bin", params[4], 64, 32, 3, 3) +dumpFcBias(dir_prefix + "conv3_bias.bin", params[5], 64) +dumpConvWeights(dir_prefix + "conv4.bin", params[6], 64, 64, 3, 3) +dumpFcBias(dir_prefix + "conv4_bias.bin", params[7], 64) +dumpConvWeights(dir_prefix + "conv5.bin", params[8], 128, 64, 3, 3) +dumpFcBias(dir_prefix + "conv5_bias.bin", params[9], 128) +dumpConvWeights(dir_prefix + "conv6.bin", params[10], 128, 128, 3, 3) +dumpFcBias(dir_prefix + "conv6_bias.bin", params[11], 128) + +dumpFcWeights(dir_prefix + "fc1.bin", params[12], 2048, 10) +dumpFcBias(dir_prefix + "fc1_bias.bin", params[13], 10) + + diff --git a/hpvm/projects/onnx/src/keras_models/lenet.py b/hpvm/projects/onnx/src/keras_models/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..c9588eef6c393457617b7fdda03c7b8222af5357 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/lenet.py @@ -0,0 +1,97 @@ + +import sys +import keras +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten, Activation +from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D +from keras import backend as K +from frontend.approxhpvm_translator import translate_to_approxhpvm + + +batch_size = 128 +num_classes = 10 + + +# input image dimensions +img_rows, img_cols = 28, 28 + + +if __name__ == "__main__": + + # Changing Keras data format to NCHW - NHWC is default + # NOTE: ApproxHPVM requires NCHW format + K.set_image_data_format('channels_first') + + # Loads Mnist dataset + (x_train, y_train), (x_test, y_test) = mnist.load_data() + test_labels = y_test + + # Reshaping data to be NCHW format + x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) + x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) + + + # Data Normalization + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train /= 255 + x_test /= 255 + + + # convert class vectors to binary class matrices - required by Keras + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + + + + # Network Compostion: 3 Conv Layers, 2 Dense Layers + model = Sequential() + + # ConvLayer1 + model.add(Conv2D(32, kernel_size=(5, 5), + activation='relu', + padding = 'same', + input_shape=input_shape)) + model.add(MaxPooling2D(pool_size=(2, 2))) + + # ConvLayer2 + model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same')) + + # ConvLayer3 + # NOTE: ZeroPading needed for ConvLayer with strides > 1 + model.add(ZeroPadding2D(padding = (1,1))) + model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') ) + + model.add(Flatten()) + # DenseLayer1 + model.add(Dense(1024, activation='relu')) + # DenseLayer2 + model.add(Dense(num_classes, activation='relu')) + # Softmax Layer + model.add(Activation('softmax')) + + + # Configures model for training + model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=keras.optimizers.Adadelta(), + metrics=['accuracy']) + + # Training + model.fit(x_train, y_train, + batch_size=batch_size, + epochs=5, + verbose=1, + validation_data=(x_test, y_test)) + + + # Inference + score = model.evaluate(x_test, y_test, verbose=0) + print('Test loss:', score[0]) + print('Test accuracy:', score[1]) + + + # NOTE: Call to ApproxHPVM Translator - Dumps weights and ApproxHPVM C src + translate_to_approxhpvm(model, "data/lenet_hpvm_batch/", x_test, test_labels, 10) + diff --git a/hpvm/projects/onnx/src/keras_models/mnist.ipynb b/hpvm/projects/onnx/src/keras_models/mnist.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1964ebe3991a1eb78760ea6a1739eacfec47e666 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/mnist.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import numpy as np\n", + "import onnx\n", + "import glob\n", + "from onnxruntime.backend.backend import OnnxRuntimeBackend as backend\n", + "\n", + "from onnx import numpy_helper\n", + "\n", + "# onnx2hpvm modules\n", + "from onnx2hpvm.onnx_translator import from_onnx_to_hpvm" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "model = onnx.load('../models/mnist/mnist.onnx')\n", + "test_data_dir = '../models/mnist/test_data_set_0'" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "# Load inputs\n", + "inputs = []\n", + "inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))\n", + "print(inputs_num)\n", + "for i in range(inputs_num):\n", + " input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))\n", + " tensor = onnx.TensorProto()\n", + " with open(input_file, 'rb') as f:\n", + " tensor.ParseFromString(f.read())\n", + " inputs.append(numpy_helper.to_array(tensor))\n", + "\n", + "# Load reference outputs\n", + "ref_outputs = []\n", + "ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb')))\n", + "for i in range(ref_outputs_num):\n", + " output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i))\n", + " tensor = onnx.TensorProto()\n", + " with open(output_file, 'rb') as f:\n", + " tensor.ParseFromString(f.read())\n", + " ref_outputs.append(numpy_helper.to_array(tensor))\n", + "\n", + "# Run the model on the backend\n", + "outputs = list(backend.run_model(model, inputs))\n", + "\n", + "#from_onnx_to_hpvm(model)\n", + "# Compare the results with reference outputs.\n", + "#for ref_o, o in zip(ref_outputs, outputs):\n", + "# np.testing.assert_almost_equal(ref_o, o)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py b/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py new file mode 100644 index 0000000000000000000000000000000000000000..b739ed819634f30f4b33173443ac41f848f9c8f1 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py @@ -0,0 +1,161 @@ + +import sys +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '1' + +from keras.models import Sequential +from keras.layers import * +from keras.datasets import cifar10 +from keras.utils import to_categorical +from keras.callbacks import * +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Model +from keras import optimizers +import keras.backend as K +from frontend.approxhpvm_translator import translate_to_approxhpvm + + +K.set_image_data_format('channels_first') + +(X_train, y_train), (X_test, y_test) = cifar10.load_data() +test_labels = y_test + +print ("X_train.shape = ", X_train.shape) +print ("X_test.shape = ", X_test.shape) + + +X_train = X_train.astype('float32') +X_test = X_test.astype('float32') + + +mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True) +std = np.std(X_train, axis=(0, 2, 3), keepdims=True) + +X_train = (X_train - mean) / (std + 1e-9) +X_test = (X_test - mean) / (std + 1e-9) + +y_train = to_categorical(y_train, num_classes=10) +y_test = to_categorical(y_test, num_classes=10) + + +def get_mobilenet(alpha=1, depth_multiplier=1): + model = Sequential() + + def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)): + channel_axis = 1 + + model.add(Conv2D(filters, kernel, + padding='same', + use_bias=False, + strides=strides, + input_shape=(3, 32, 32))) + model.add(BatchNormalization(axis=channel_axis)) + model.add(Activation('relu')) + + def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)): + channel_axis = 1 + + model.add(ZeroPadding2D(padding = ((1,1), (1,1) ))) + + model.add(DepthwiseConv2D((3, 3), + padding='valid', + #depth_multiplier=depth_multiplier, + strides=strides, + use_bias=False)) + model.add(BatchNormalization(axis=channel_axis)) + + model.add(Activation('relu')) + model.add(Conv2D(pointwise_conv_filters, (1, 1), + padding='same', + use_bias=False, + strides=(1, 1))) + model.add(BatchNormalization(axis=channel_axis)) + model.add(Activation('relu')) + + + _conv_block(32, alpha, strides=(1, 1)) + + _depthwise_conv_block(64, alpha, depth_multiplier) + + _depthwise_conv_block(128, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(128, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(256, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(256, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(512, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(512, alpha, depth_multiplier) + _depthwise_conv_block(512, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(512, alpha, depth_multiplier) + _depthwise_conv_block(512, alpha, depth_multiplier) + _depthwise_conv_block(512, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(1024, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(1024, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + model.add(AveragePooling2D(pool_size=2)) + model.add(Flatten()) + model.add(Dense(10, activation='softmax')) + + return model + + +# data augmentation, horizontal flips only +datagen = ImageDataGenerator( + featurewise_center=False, + featurewise_std_normalization=False, + rotation_range=0.0, + width_shift_range=0.0, + height_shift_range=0.0, + vertical_flip=False, + horizontal_flip=True) +datagen.fit(X_train) + + +model = get_mobilenet() + +learning_rates=[] +for i in range(5): + learning_rates.append(2e-2) +for i in range(50-5): + learning_rates.append(1e-2) +for i in range(100-50): + learning_rates.append(8e-3) +for i in range(150-100): + learning_rates.append(4e-3) +for i in range(200-150): + learning_rates.append(2e-3) +for i in range(300-200): + learning_rates.append(1e-3) + +callbacks = [ + LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) +] + +model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), + loss='categorical_crossentropy', + metrics=['accuracy']) + +model.fit_generator( + datagen.flow(X_train, y_train, batch_size=128), + steps_per_epoch=int(np.ceil(50000 / 128)), + validation_data=(X_test, y_test), + #epochs=300, + epochs=50, + callbacks=callbacks +) + +model.summary() + +translate_to_approxhpvm(model, "data/mobilenet_hpvm/", X_test, test_labels, 10) + diff --git a/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py b/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py new file mode 100644 index 0000000000000000000000000000000000000000..64df7f98174f22a59f3382ed4337d23e29900051 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py @@ -0,0 +1,158 @@ +import sys +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +from keras.models import Sequential +from keras.layers import * +from keras.datasets import cifar10 +from keras.utils import to_categorical +from keras.callbacks import * +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Model +from keras import optimizers +import keras.backend as K +from frontend.approxhpvm_translator import translate_to_approxhpvm + + +K.set_image_data_format('channels_first') + +(X_train, y_train), (X_test, y_test) = cifar10.load_data() +test_labels = y_test + +print ("X_train.shape = ", X_train.shape) +print ("X_test.shape = ", X_test.shape) + + +X_train = X_train.astype('float32') +X_test = X_test.astype('float32') + + +mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True) +std = np.std(X_train, axis=(0, 2, 3), keepdims=True) + +X_train = (X_train - mean) / (std + 1e-9) +X_test = (X_test - mean) / (std + 1e-9) + +y_train = to_categorical(y_train, num_classes=10) +y_test = to_categorical(y_test, num_classes=10) + + +def get_mobilenet(alpha=1, depth_multiplier=1): + model = Sequential() + + def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)): + channel_axis = 1 + filters = int(filters * alpha) + model.add(Conv2D(filters, kernel, + padding='same', + use_bias=False, + strides=strides, + input_shape=(3, 32, 32))) + model.add(BatchNormalization(axis=channel_axis)) + model.add(Activation('relu')) + + def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)): + channel_axis = 1 + pointwise_conv_filters = int(pointwise_conv_filters * alpha) + + model.add(DepthwiseConv2D((3, 3), + padding='same', + depth_multiplier=depth_multiplier, + strides=strides, + use_bias=False)) + model.add(BatchNormalization(axis=channel_axis)) + model.add(Activation('relu')) + model.add(Conv2D(pointwise_conv_filters, (1, 1), + padding='same', + use_bias=False, + strides=(1, 1))) + model.add(BatchNormalization(axis=channel_axis)) + model.add(Activation('relu')) + + + _conv_block(32, alpha, strides=(1, 1)) + + _depthwise_conv_block(64, alpha, depth_multiplier) + + _depthwise_conv_block(128, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(128, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(256, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(256, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(512, alpha, depth_multiplier, + strides=(2, 2)) +# _depthwise_conv_block(512, alpha, depth_multiplier) +# _depthwise_conv_block(512, alpha, depth_multiplier) +# model.add(Dropout(rate=0.5)) + +# _depthwise_conv_block(512, alpha, depth_multiplier) +# _depthwise_conv_block(512, alpha, depth_multiplier) +# _depthwise_conv_block(512, alpha, depth_multiplier) +# model.add(Dropout(rate=0.5)) + +# _depthwise_conv_block(1024, alpha, depth_multiplier, +# strides=(2, 2)) +# _depthwise_conv_block(1024, alpha, depth_multiplier) +# model.add(Dropout(rate=0.5)) + + model.add(AveragePooling2D(pool_size=2)) + model.add(Flatten()) + model.add(Dense(10, activation='softmax')) + + return model + + +# data augmentation, horizontal flips only +datagen = ImageDataGenerator( + featurewise_center=False, + featurewise_std_normalization=False, + rotation_range=0.0, + width_shift_range=0.2, + height_shift_range=0.2, + vertical_flip=False, + horizontal_flip=True) +datagen.fit(X_train) + + +model = get_mobilenet() + +learning_rates=[] +for i in range(5): + learning_rates.append(5e-2) +for i in range(50-5): + learning_rates.append(2e-2) +for i in range(100-50): + learning_rates.append(8e-3) +for i in range(150-100): + learning_rates.append(4e-3) +for i in range(200-150): + learning_rates.append(2e-3) +for i in range(250-200): + learning_rates.append(1e-3) + +callbacks = [ + LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) +] + +model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), + loss='categorical_crossentropy', + metrics=['accuracy']) + +model.fit_generator( + datagen.flow(X_train, y_train, batch_size=128), + steps_per_epoch=int(np.ceil(50000 / 128)), + validation_data=(X_test, y_test), + #epochs=300, + epochs=250, + callbacks=callbacks +) + +model.summary() + +translate_to_approxhpvm(model, "data/mobilenet_shallow/", X_test, test_labels, 10) + diff --git a/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py b/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py new file mode 100644 index 0000000000000000000000000000000000000000..2fbed4623d0e57d7a0dd948fa0894127fea72324 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py @@ -0,0 +1,176 @@ +import sys +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '1' + +from keras.models import Sequential +from keras.layers import * +from keras.datasets import cifar10 +from keras.utils import to_categorical +from keras.callbacks import * +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Model +from keras import optimizers +import keras.backend as K + + +K.set_image_data_format('channels_first') + +(X_train, y_train), (X_test, y_test) = cifar10.load_data() +test_labels = y_test + +print ("X_train.shape = ", X_train.shape) +print ("X_test.shape = ", X_test.shape) + + +X_train = X_train.astype('float32') +X_test = X_test.astype('float32') + +mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True) +std = np.std(X_train, axis=(0, 1, 2), keepdims=True) +X_train = (X_train - mean) / (std + 1e-9) +X_test = (X_test - mean) / (std + 1e-9) + +y_train = to_categorical(y_train, num_classes=10) +y_test = to_categorical(y_test, num_classes=10) + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + +def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): + channel_axis = 1 + + in_channels = inputs.shape[1] + pointwise_conv_filters = int(filters * alpha) + pointwise_filters = _make_divisible(pointwise_conv_filters, 8) + x = inputs + + if block_id: + x = Conv2D(int(expansion * in_channels), kernel_size=1, strides=1, padding='valid', use_bias=False)(x) + x = BatchNormalization(axis=channel_axis)(x) + x = Activation('relu')(x) + + if stride == 2: + x = ZeroPadding2D(padding=(1, 1))(x) + else: + x = ZeroPadding2D(padding=(1, 1))(x) + + x = DepthwiseConv2D(kernel_size=3, strides=stride, use_bias=False, padding='valid')(x) + x = BatchNormalization(axis=channel_axis)(x) + x = Activation('relu')(x) + + x = Conv2D(pointwise_filters, kernel_size=1, strides=1, padding='valid', use_bias=False)(x) + x = BatchNormalization(axis=channel_axis)(x) + + + if in_channels == pointwise_filters and stride == 1: + return Add()([inputs, x]) + return x + +def get_mobilenetv2(alpha=1.0, depth_multiplier=1): + + channel_axis = 1 + + first_block_filters = _make_divisible(32 * alpha, 8) + img_input = Input(shape=(3, 32, 32)) + + x = ZeroPadding2D(padding=(1, 1))(img_input) + x = Conv2D(first_block_filters, kernel_size=3, strides=1, padding='valid', use_bias=False)(x) + #x = BatchNormalization(axis=channel_axis)(x) + #x = Activation('relu')(x) + + x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0 ) + + x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=1 ) + x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2 ) + + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3 ) + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4 ) + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5 ) + + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6 ) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7 ) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8 ) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9 ) + x = Dropout(rate=0.25)(x) + + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) + x = Dropout(rate=0.25)(x) + + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) + x = Dropout(rate=0.25)(x) + + x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) + x = Dropout(rate=0.25)(x) + + if alpha > 1.0: + last_block_filters = _make_divisible(1280 * alpha, 8) + else: + last_block_filters = 1280 + + x = Conv2D(last_block_filters, kernel_size=1, use_bias=False)(x) + x = BatchNormalization(axis=channel_axis)(x) + x = Activation('relu')(x) + + x = AveragePooling2D()(x) + x = Flatten()(x) + x = Dense(10, activation='softmax')(x) + + model = Model(inputs=img_input, outputs=x) + return model + + +# data augmentation, horizontal flips only +datagen = ImageDataGenerator( + featurewise_center=False, + featurewise_std_normalization=False, + rotation_range=0.0, + width_shift_range=0.0, + height_shift_range=0.0, + vertical_flip=False, + horizontal_flip=True) +datagen.fit(X_train) + + +model = get_mobilenetv2() + +learning_rates=[] +for i in range(5): + learning_rates.append(2e-2) +for i in range(50-5): + learning_rates.append(1e-2) +for i in range(100-50): + learning_rates.append(8e-3) +for i in range(150-100): + learning_rates.append(4e-3) +for i in range(200-150): + learning_rates.append(2e-3) +for i in range(300-200): + learning_rates.append(1e-3) + +callbacks = [ + LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) +] + +model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), + loss='categorical_crossentropy', + metrics=['accuracy']) + +model.fit_generator( + datagen.flow(X_train, y_train, batch_size=128), + steps_per_epoch=int(np.ceil(50000 / 128)), + validation_data=(X_test, y_test), + epochs=300, + callbacks=callbacks +) + diff --git a/hpvm/projects/onnx/src/keras_models/resnet.py b/hpvm/projects/onnx/src/keras_models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..3a9e092170c767d433e438b16fdc38bd9bb9d966 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/resnet.py @@ -0,0 +1,576 @@ +""" +#Trains a ResNet on the CIFAR10 dataset. + +ResNet v1: +[Deep Residual Learning for Image Recognition +](https://arxiv.org/pdf/1512.03385.pdf) + +ResNet v2: +[Identity Mappings in Deep Residual Networks +](https://arxiv.org/pdf/1603.05027.pdf) + + +Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti +:------------|--:|-------:|-----------------------:|---: +ResNet20 v1| 3| 92.16 %| 91.25 %|35 +ResNet32 v1| 5| 92.46 %| 92.49 %|50 +ResNet44 v1| 7| 92.50 %| 92.83 %|70 +ResNet56 v1| 9| 92.71 %| 93.03 %|90 +ResNet110 v1| 18| 92.65 %| 93.39+-.16 %|165 +ResNet164 v1| 27| - %| 94.07 %| - +ResNet1001 v1|N/A| - %| 92.39 %| - + + + +Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti +:------------|--:|-------:|-----------------------:|---: +ResNet20 v2| 2| - %| - %|--- +ResNet32 v2|N/A| NA %| NA %| NA +ResNet44 v2|N/A| NA %| NA %| NA +ResNet56 v2| 6| 93.01 %| NA %|100 +ResNet110 v2| 12| 93.15 %| 93.63 %|180 +ResNet164 v2| 18| - %| 94.54 %| - +ResNet1001 v2|111| - %| 95.08+-.14 %| - +""" + +from __future__ import print_function +import keras +from keras.layers import Dense, Conv2D, BatchNormalization, Activation +from keras.layers import AveragePooling2D, Input, Flatten, ZeroPadding2D +from keras.optimizers import Adam +from keras.callbacks import ModelCheckpoint, LearningRateScheduler +from keras.callbacks import ReduceLROnPlateau +from keras.preprocessing.image import ImageDataGenerator +from keras.regularizers import l2 +from keras import backend as K +from keras.models import Model +from keras.datasets import cifar10 +from keras import backend as K +import numpy as np +import os +import sys +#from approxhpvm_translator import translate_to_approxhpvm +#from weight_utils import dumpCalibrationData + + + +os.environ["CUDA_VISIBLE_DEVICES"] = "0" + + +K.set_image_data_format('channels_first') + + +# Training parameters +batch_size = 32 # orig paper trained all networks with batch_size=128 +#---- epochs = 200 +epochs = 2 +data_augmentation = True +num_classes = 10 + +# Subtracting pixel mean improves accuracy +subtract_pixel_mean = True + +# Model parameter +# ---------------------------------------------------------------------------- +# | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch +# Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti +# |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) +# ---------------------------------------------------------------------------- +# ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) +# ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) +# ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) +# ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) +# ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) +# ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) +# ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) +# --------------------------------------------------------------------------- +n = 3 + +# Model version +# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) +version = 1 + +# Computed depth from supplied model parameter n +if version == 1: + depth = n * 6 + 2 +elif version == 2: + depth = n * 9 + 2 + +# Model name, depth and version +model_type = 'ResNet%dv%d' % (depth, version) + +# Load the CIFAR10 data. +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +test_labels = y_test +train_labels = y_train + +# Input image dimensions. +input_shape = x_train.shape[1:] + +# Normalize data. +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 + +# If subtract pixel mean is enabled +if subtract_pixel_mean: + x_train_mean = np.mean(x_train, axis=0) + x_train -= x_train_mean + x_test -= x_train_mean + +print('x_train shape:', x_train.shape) +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') +print('y_train shape:', y_train.shape) + +# Convert class vectors to binary class matrices. +y_train = keras.utils.to_categorical(y_train, num_classes) +y_test = keras.utils.to_categorical(y_test, num_classes) + + + + + + +def lr_schedule(epoch): + """Learning Rate Schedule + + Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. + Called automatically every epoch as part of callbacks during training. + + # Arguments + epoch (int): The number of epochs + + # Returns + lr (float32): learning rate + """ + lr = 1e-3 + if epoch > 180: + lr *= 0.5e-3 + elif epoch > 160: + lr *= 1e-3 + elif epoch > 120: + lr *= 1e-2 + elif epoch > 80: + lr *= 1e-1 + print('Learning rate: ', lr) + return lr + + +def resnet_layer(inputs, + num_filters=16, + kernel_size=3, + strides=1, + activation='relu', + batch_normalization=True, + conv_first=True): + """2D Convolution-Batch Normalization-Activation stack builder + + # Arguments + inputs (tensor): input tensor from input image or previous layer + num_filters (int): Conv2D number of filters + kernel_size (int): Conv2D square kernel dimensions + strides (int): Conv2D square stride dimensions + activation (string): activation name + batch_normalization (bool): whether to include batch normalization + conv_first (bool): conv-bn-activation (True) or + bn-activation-conv (False) + + # Returns + x (tensor): tensor as input to the next layer + """ + conv = Conv2D(num_filters, + kernel_size=kernel_size, + strides=strides, + padding='valid', # NOTE: using valid convs with explicit pad operation + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4)) + + padding_value = int((kernel_size - 1) / 2) + zero_padding = ZeroPadding2D(padding = (padding_value, padding_value)) + + # FIXME: Temporarily disabled batch normalization + batch_normalization = False + + x = inputs + x = zero_padding(x) + if conv_first: + x = conv(x) + if batch_normalization: + x = BatchNormalization()(x) + if activation is not None: + x = Activation(activation)(x) + else: + if batch_normalization: + x = BatchNormalization()(x) + if activation is not None: + x = Activation(activation)(x) + x = conv(x) + return x + + + +def resnet_v0(input_shape, depth, num_classes=10): + """ResNet Version 1 Model builder [a] + + Stacks of 2 x (3 x 3) Conv2D-BN-ReLU + Last ReLU is after the shortcut connection. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filters is + doubled. Within each stage, the layers have the same number filters and the + same number of filters. + Features maps sizes: + stage 0: 32x32, 16 + stage 1: 16x16, 32 + stage 2: 8x8, 64 + The Number of parameters is approx the same as Table 6 of [a]: + ResNet20 0.27M + ResNet32 0.46M + ResNet44 0.66M + ResNet56 0.85M + ResNet110 1.7M + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 6 != 0: + raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') + # Start model definition. + num_filters = 16 + num_res_blocks = int((depth - 2) / 6) + + inputs = Input(shape=input_shape) + x = resnet_layer(inputs=inputs) + # Instantiate the stack of residual units + for stack in range(3): + for res_block in range(num_res_blocks): + strides = 1 + if stack > 0 and res_block == 0: # first layer but not first stack + strides = 2 # downsample + y = resnet_layer(inputs=x, + num_filters=num_filters, + strides=strides) + y = resnet_layer(inputs=y, + num_filters=num_filters, + activation=None) + if stack > 0 and res_block == 0: # first layer but not first stack + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + x = Activation('relu')(x) + num_filters *= 1 + + # Add classifier on top. + # v1 does not use BN after last shortcut connection-ReLU + #-- x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + x = Dense(64)(y) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(x) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + + +def resnet_v1_1(input_shape, depth, num_classes=10): + """ResNet Version 1 Model builder [a] + + Stacks of 2 x (3 x 3) Conv2D-BN-ReLU + Last ReLU is after the shortcut connection. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filters is + doubled. Within each stage, the layers have the same number filters and the + same number of filters. + Features maps sizes: + stage 0: 32x32, 16 + stage 1: 16x16, 32 + stage 2: 8x8, 64 + The Number of parameters is approx the same as Table 6 of [a]: + ResNet20 0.27M + ResNet32 0.46M + ResNet44 0.66M + ResNet56 0.85M + ResNet110 1.7M + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 6 != 0: + raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') + # Start model definition. + num_filters = 16 + num_res_blocks = int((depth - 2) / 6) + + inputs = Input(shape=input_shape) + x = resnet_layer(inputs=inputs) + # Instantiate the stack of residual units + for stack in range(3): + for res_block in range(num_res_blocks): + strides = 1 + if stack > 0 and res_block == 0: # first layer but not first stack + strides = 2 # downsample + y = resnet_layer(inputs=x, + num_filters=num_filters, + strides=strides) + y = resnet_layer(inputs=y, + num_filters=num_filters, + activation=None) + if stack > 0 and res_block == 0: # first layer but not first stack + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + x = Activation('relu')(x) + num_filters *= 2 + + + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + #activation='softmax', + kernel_initializer='he_normal')(y) + + outputs = Activation('softmax')(outputs) + + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + + + +def resnet_v2(input_shape, depth, num_classes=10): + """ResNet Version 2 Model builder [b] + + Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as + bottleneck layer + First shortcut connection per layer is 1 x 1 Conv2D. + Second and onwards shortcut connection is identity. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filter maps is + doubled. Within each stage, the layers have the same number filters and the + same filter map sizes. + Features maps sizes: + conv1 : 32x32, 16 + stage 0: 32x32, 64 + stage 1: 16x16, 128 + stage 2: 8x8, 256 + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 9 != 0: + raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') + # Start model definition. + num_filters_in = 16 + num_res_blocks = int((depth - 2) / 9) + + inputs = Input(shape=input_shape) + # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths + x = resnet_layer(inputs=inputs, + num_filters=num_filters_in, + conv_first=True) + + # Instantiate the stack of residual units + for stage in range(3): + for res_block in range(num_res_blocks): + activation = 'relu' + batch_normalization = True + strides = 1 + if stage == 0: + num_filters_out = num_filters_in * 4 + if res_block == 0: # first layer and first stage + activation = None + batch_normalization = False + else: + num_filters_out = num_filters_in * 2 + if res_block == 0: # first layer but not first stage + strides = 2 # downsample + + # bottleneck residual unit + y = resnet_layer(inputs=x, + num_filters=num_filters_in, + kernel_size=1, + strides=strides, + activation=activation, + batch_normalization=batch_normalization, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_in, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_out, + kernel_size=1, + conv_first=False) + if res_block == 0: + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters_out, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + + num_filters_in = num_filters_out + + # Add classifier on top. + # v2 has BN-ReLU before Pooling + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(y) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + +depth = 20 + +if version == 2: + model = resnet_v2(input_shape=input_shape, depth=depth) +else: + model = resnet_v1_1(input_shape=input_shape, depth=depth) + + +model.compile(loss='categorical_crossentropy', + optimizer=Adam(lr=lr_schedule(0)), + metrics=['accuracy']) +model.summary() +print(model_type) + +# Prepare model model saving directory. +save_dir = os.path.join(os.getcwd(), 'saved_models') +model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type +if not os.path.isdir(save_dir): + os.makedirs(save_dir) +filepath = os.path.join(save_dir, model_name) + +# Prepare callbacks for model saving and for learning rate adjustment. +checkpoint = ModelCheckpoint(filepath=filepath, + monitor='val_acc', + verbose=1, + save_best_only=True) + +lr_scheduler = LearningRateScheduler(lr_schedule) + +lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), + cooldown=0, + patience=5, + min_lr=0.5e-6) + +callbacks = [checkpoint, lr_reducer, lr_scheduler] + +# Run training, with or without data augmentation. +if not data_augmentation: + print('Not using data augmentation.') + model.fit(x_train, y_train, + batch_size=batch_size, + epochs=epochs, + validation_data=(x_test, y_test), + shuffle=True, + callbacks=callbacks) +else: + print('Using real-time data augmentation.') + # This will do preprocessing and realtime data augmentation: + datagen = ImageDataGenerator( + # set input mean to 0 over the dataset + featurewise_center=False, + # set each sample mean to 0 + samplewise_center=False, + # divide inputs by std of dataset + featurewise_std_normalization=False, + # divide each input by its std + samplewise_std_normalization=False, + # apply ZCA whitening + zca_whitening=False, + # epsilon for ZCA whitening + zca_epsilon=1e-06, + # randomly rotate images in the range (deg 0 to 180) + rotation_range=0, + # randomly shift images horizontally + width_shift_range=0.1, + # randomly shift images vertically + height_shift_range=0.1, + # set range for random shear + shear_range=0., + # set range for random zoom + zoom_range=0., + # set range for random channel shifts + channel_shift_range=0., + # set mode for filling points outside the input boundaries + fill_mode='nearest', + # value used for fill_mode = "constant" + cval=0., + # randomly flip images + horizontal_flip=True, + # randomly flip images + vertical_flip=False, + # set rescaling factor (applied before any other transformation) + rescale=None, + # set function that will be applied on each input + preprocessing_function=None, + # image data format, either "channels_first" or "channels_last" + data_format="channels_first", + # fraction of images reserved for validation (strictly between 0 and 1) + validation_split=0.0) + + # Compute quantities required for featurewise normalization + # (std, mean, and principal components if ZCA whitening is applied). + datagen.fit(x_train) + + # Fit the model on the batches generated by datagen.flow(). + model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), + validation_data=(x_test, y_test), + epochs=epochs, verbose=1, workers=4, + callbacks=callbacks) + +# Score trained model. +scores = model.evaluate(x_test, y_test, verbose=1) +print('Test loss:', scores[0]) +print('Test accuracy:', scores[1]) + + +#dumpCalibrationData("calibration_data/resnet18_calib.bin", x_train, +# "calibration_data/resnet18_train_labels.bin", train_labels) +#sys.exit(0) + +#translate_to_approxhpvm(model, "resnet18_cifar10_hpvm/", x_test, test_labels) + +#translate_to_approxhpvm(model, "resnet_test/", x_test, test_labels, 'resnet18_cifar10_promise/', y_test) + +import keras2onnx +onnx_model = keras2onnx.convert_keras(model, model.name) +import onnx +onnx.save(onnx_model, "../models/keras/resnet.onnx") diff --git a/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py b/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py new file mode 100644 index 0000000000000000000000000000000000000000..64e5d36e78fd728381dc864bf965ff68c4e7cf16 --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py @@ -0,0 +1,241 @@ + + +from __future__ import print_function +import keras +from keras.datasets import cifar10 +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation, Flatten +from keras.layers import Conv2D, MaxPooling2D, BatchNormalization +from keras import optimizers +import numpy as np +from keras.layers.core import Lambda +from keras import backend as K +from keras import regularizers +import os +import sys +from frontend.approxhpvm_translator import translate_to_approxhpvm +from frontend.weight_utils import dumpCalibrationData + + + +class cifar10vgg: + def __init__(self,train=True): + self.num_classes = 10 + self.weight_decay = 0.0005 + self.x_shape = [3,32,32] + + self.model = self.build_model() + if train: + self.model = self.train(self.model) + else: + self.model.load_weights('cifar10vgg.h5') + + + def build_model(self): + # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. + + model = Sequential() + weight_decay = self.weight_decay + + model.add(Conv2D(64, (3, 3), padding='same', + input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.3)) + + model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + + model.add(Dropout(0.5)) + model.add(Dense(self.num_classes)) + model.add(Activation('softmax')) + return model + + + def normalize(self,X_train,X_test): + #this function normalize inputs for zero mean and unit variance + # it is used when training a model. + # Input: training set and test set + # Output: normalized training set and test set according to the trianing set statistics. + mean = np.mean(X_train,axis=(0,1,2,3)) + std = np.std(X_train, axis=(0, 1, 2, 3)) + X_train = (X_train-mean)/(std+1e-7) + X_test = (X_test-mean)/(std+1e-7) + return X_train, X_test + + + def normalize_production(self,x): + #this function is used to normalize instances in production according to saved training set statistics + # Input: X - a training set + # Output X - a normalized training set according to normalization constants. + + #these values produced during first training and are general for the standard cifar10 training set normalization + mean = 120.707 + std = 64.15 + return (x-mean)/(std+1e-7) + + + def predict(self,x,normalize=True,batch_size=50): + if normalize: + x = self.normalize_production(x) + return self.model.predict(x,batch_size) + + + def train(self,model): + + #training parameters + batch_size = 128 + #maxepoches = 250 + #maxepoches = 250 + maxepoches = 30 + learning_rate = 0.01 + lr_decay = 1e-6 + lr_drop = 20 + # The data, shuffled and split between train and test sets: + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train, x_test = self.normalize(x_train, x_test) + + y_train = keras.utils.to_categorical(y_train, self.num_classes) + y_test = keras.utils.to_categorical(y_test, self.num_classes) + + def lr_scheduler(epoch): + return learning_rate * (0.5 ** (epoch // lr_drop)) + reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) + + #data augmentation + datagen = ImageDataGenerator( + featurewise_center=False, # set input mean to 0 over the dataset + samplewise_center=False, # set each sample mean to 0 + featurewise_std_normalization=False, # divide inputs by std of the dataset + samplewise_std_normalization=False, # divide each input by its std + zca_whitening=False, # apply ZCA whitening + rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) + width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) + height_shift_range=0.1, # randomly shift images vertically (fraction of total height) + horizontal_flip=True, # randomly flip images + vertical_flip=False) # randomly flip images + # (std, mean, and principal components if ZCA whitening is applied). + datagen.fit(x_train) + + + + #optimization details + sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) + model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) + + + # training process in a for loop with learning rate drop every 25 epoches. + + historytemp = model.fit_generator(datagen.flow(x_train, y_train, + batch_size=batch_size), + steps_per_epoch=x_train.shape[0] // batch_size, + epochs=maxepoches, + validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) + + model.save_weights('cifar10vgg.h5') + return model + + + +if __name__ == '__main__': + + K.set_image_data_format('channels_first') + + os.environ["CUDA_VISIBLE_DEVICES"] = "1" + + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + test_labels = y_test + train_labels = y_train + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + + y_train = keras.utils.to_categorical(y_train, 10) + y_test = keras.utils.to_categorical(y_test, 10) + + model = cifar10vgg() + + predicted_x = model.predict(x_test) + + norm_test = model.normalize_production(x_test) + + # Normalizing train data before dumping + #x_train, x_test = model.normalize(x_train, x_test) + x_train = model.normalize_production(x_train) + + # dumpCalibrationData("vgg16_cifar_calib.bin", x_train, "vgg16_train_labels.bin", train_labels) + + translate_to_approxhpvm(model.model, "data/vgg16_cifar10/", norm_test, test_labels, 10) + + residuals = np.argmax(predicted_x,1)!=np.argmax(y_test,1) + + loss = sum(residuals)/len(residuals) + print("the validation 0/1 loss is: ",loss) + + + diff --git a/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py b/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py new file mode 100644 index 0000000000000000000000000000000000000000..66fe6be669f984b92c8c602332c29e09968e9c8a --- /dev/null +++ b/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py @@ -0,0 +1,243 @@ + +from __future__ import print_function +import os +import keras +from keras.datasets import cifar100 +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation, Flatten +from keras.layers import Conv2D, MaxPooling2D +from keras import optimizers +import numpy as np +from keras.layers.core import Lambda +from keras import backend as K +from keras import regularizers +from approxhpvm_translator import translate_to_approxhpvm +import sys +from weight_utils import dumpCalibrationData + + + +class cifar100vgg: + def __init__(self,train=True): + self.num_classes = 100 + self.weight_decay = 0.0005 + self.x_shape = [3,32,32] + + self.model = self.build_model() + if train: + self.model = self.train(self.model) + else: + self.model.load_weights('cifar100vgg.h5') + + + def build_model(self): + # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. + + model = Sequential() + weight_decay = self.weight_decay + + model.add(Conv2D(64, (3, 3), padding='same', + input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.3)) + + model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + + model.add(Dropout(0.5)) + model.add(Dense(self.num_classes)) + model.add(Activation('softmax')) + return model + + + def normalize(self,X_train,X_test): + #this function normalize inputs for zero mean and unit variance + # it is used when training a model. + # Input: training set and test set + # Output: normalized training set and test set according to the trianing set statistics. + mean = np.mean(X_train,axis=(0,1,2,3)) + std = np.std(X_train, axis=(0, 1, 2, 3)) + print(mean) + print(std) + X_train = (X_train-mean)/(std+1e-7) + X_test = (X_test-mean)/(std+1e-7) + return X_train, X_test + + def normalize_production(self,x): + #this function is used to normalize instances in production according to saved training set statistics + # Input: X - a training set + # Output X - a normalized training set according to normalization constants. + + #these values produced during first training and are general for the standard cifar10 training set normalization + mean = 121.936 + std = 68.389 + return (x-mean)/(std+1e-7) + + def predict(self,x,normalize=True,batch_size=50): + if normalize: + x = self.normalize_production(x) + return self.model.predict(x,batch_size) + + def train(self,model): + + #training parameters + batch_size = 128 + #maxepoches = 250 + #maxepoches = 400 + maxepoches = 4 + learning_rate = 0.05 + lr_decay = 1e-6 + lr_drop = 20 + + # The data, shuffled and split between train and test sets: + (x_train, y_train), (x_test, y_test) = cifar100.load_data() + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train, x_test = self.normalize(x_train, x_test) + + y_train = keras.utils.to_categorical(y_train, self.num_classes) + y_test = keras.utils.to_categorical(y_test, self.num_classes) + + + def lr_scheduler(epoch): + return learning_rate * (0.5 ** (epoch // lr_drop)) + reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) + + + #data augmentation + datagen = ImageDataGenerator( + featurewise_center=False, # set input mean to 0 over the dataset + samplewise_center=False, # set each sample mean to 0 + featurewise_std_normalization=False, # divide inputs by std of the dataset + samplewise_std_normalization=False, # divide each input by its std + zca_whitening=False, # apply ZCA whitening + rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) + width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) + height_shift_range=0.1, # randomly shift images vertically (fraction of total height) + horizontal_flip=True, # randomly flip images + vertical_flip=False) # randomly flip images + # (std, mean, and principal components if ZCA whitening is applied). + datagen.fit(x_train) + + + + #optimization details + sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) + model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) + + + # training process in a for loop with learning rate drop every 25 epoches. + + historytemp = model.fit_generator(datagen.flow(x_train, y_train, + batch_size=batch_size), + steps_per_epoch=x_train.shape[0] // batch_size, + epochs=maxepoches, + validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) + model.save_weights('cifar100vgg.h5') + return model + +if __name__ == '__main__': + + K.set_image_data_format('channels_first') + os.environ["CUDA_VISIBLE_DEVICES"] = "1" + + (x_train, y_train), (x_test, y_test) = cifar100.load_data() + test_labels = y_test + train_labels = y_train + + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + + y_train = keras.utils.to_categorical(y_train, 100) + y_test = keras.utils.to_categorical(y_test, 100) + + model = cifar100vgg() + + predicted_x = model.predict(x_test) + + norm_test = model.normalize_production(x_test) + + x_train = model.normalize_production(x_train) + + dumpCalibrationData("calibration_data/vgg16_cifar100_calib.bin", x_train, + "calibration_data/vgg16_cifar100_train_labels.bin", train_labels) + sys.exit(0) + + + translate_to_approxhpvm(model.model, "vgg16_cifar100_test/", norm_test, test_labels, + "vgg16_cifar100_front", y_test) + + + residuals = (np.argmax(predicted_x,1)!=np.argmax(y_test,1)) + loss = sum(residuals)/len(residuals) + print("the validation 0/1 loss is: ",loss) + + diff --git a/hpvm/projects/onnx/src/lenet_keras.ipynb b/hpvm/projects/onnx/src/lenet_keras.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..bd83db10050f74988dc313991d06b99e247cf011 --- /dev/null +++ b/hpvm/projects/onnx/src/lenet_keras.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import sys\n", + "import keras\n", + "from keras.datasets import mnist\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Flatten, Activation\n", + "from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D\n", + "from keras import backend as K" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 128\n", + "num_classes = 10\n", + "# input image dimensions\n", + "img_rows, img_cols = 28, 28 " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Changing Keras data format to NCHW - NHWC is default\n", + "# NOTE: ApproxHPVM requires NCHW format\n", + "K.set_image_data_format('channels_first')\n", + "\n", + "# Loads Mnist dataset\n", + "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", + "test_labels = y_test\n", + "\n", + "# Reshaping data to be NCHW format \n", + "x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n", + "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n", + "input_shape = (1, img_rows, img_cols)\n", + "\n", + "\n", + "# Data Normalization \n", + "x_train = x_train.astype('float32')\n", + "x_test = x_test.astype('float32')\n", + "x_train /= 255\n", + "x_test /= 255\n", + "\n", + "\n", + "# convert class vectors to binary class matrices - required by Keras\n", + "y_train = keras.utils.to_categorical(y_train, num_classes)\n", + "y_test = keras.utils.to_categorical(y_test, num_classes)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 60000 samples, validate on 10000 samples\n", + "Epoch 1/5\n", + "60000/60000 [==============================] - 8s 131us/step - loss: 0.2878 - acc: 0.9072 - val_loss: 0.0514 - val_acc: 0.9814\n", + "Epoch 2/5\n", + "60000/60000 [==============================] - 5s 83us/step - loss: 0.0459 - acc: 0.9854 - val_loss: 0.0337 - val_acc: 0.9888\n", + "Epoch 3/5\n", + "60000/60000 [==============================] - 8s 127us/step - loss: 0.0290 - acc: 0.9908 - val_loss: 0.0275 - val_acc: 0.9902\n", + "Epoch 4/5\n", + "60000/60000 [==============================] - 9s 151us/step - loss: 0.0195 - acc: 0.9940 - val_loss: 0.0367 - val_acc: 0.9883\n", + "Epoch 5/5\n", + "60000/60000 [==============================] - 10s 169us/step - loss: 0.0136 - acc: 0.9960 - val_loss: 0.0318 - val_acc: 0.9905\n", + "Test loss: 0.031750623502753844\n", + "Test accuracy: 0.9905\n" + ] + } + ], + "source": [ + "# Network Compostion: 3 Conv Layers, 2 Dense Layers\n", + "model = Sequential()\n", + "\n", + "# ConvLayer1\n", + "model.add(Conv2D(32, kernel_size=(5, 5),\n", + " activation='relu',\n", + " padding = 'same',\n", + " input_shape=input_shape))\n", + "model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "\n", + "# ConvLayer2\n", + "model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same'))\n", + "\n", + "# ConvLayer3\n", + "# NOTE: ZeroPading needed for ConvLayer with strides > 1\n", + "model.add(ZeroPadding2D(padding = (1,1)))\n", + "model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') )\n", + "\n", + "model.add(Flatten())\n", + "# DenseLayer1\n", + "model.add(Dense(1024, activation='relu'))\n", + "# DenseLayer2\n", + "model.add(Dense(num_classes, activation='relu'))\n", + "# Softmax Layer\n", + "model.add(Activation('softmax'))\n", + "\n", + "\n", + "# Configures model for training \n", + "model.compile(loss=keras.losses.categorical_crossentropy,\n", + " optimizer=keras.optimizers.Adadelta(),\n", + " metrics=['accuracy'])\n", + "\n", + "# Training\n", + "model.fit(x_train, y_train,\n", + " batch_size=batch_size,\n", + " epochs=5,\n", + " verbose=1,\n", + " validation_data=(x_test, y_test))\n", + "\n", + "\n", + "# Inference\n", + "score = model.evaluate(x_test, y_test, verbose=0)\n", + "print('Test loss:', score[0])\n", + "print('Test accuracy:', score[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 1.037928581237793\n" + ] + } + ], + "source": [ + "import time\n", + "start = time.time()\n", + "keras_result = model.predict(x_test[:8000])\n", + "print(\"time:\", time.time() - start)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import keras2onnx\n", + "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", + "import onnx\n", + "onnx.save(onnx_model, \"../models/keras/lenet.onnx\")\n", + "import pickle\n", + "with open('dumps/lenet_keras_dump', 'wb') as fp:\n", + " pickle.dump(keras_result, fp)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/lenet_onnx.ipynb b/hpvm/projects/onnx/src/lenet_onnx.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fdf69ebc3cde8c1e12bee99bcde574786ffd0c18 --- /dev/null +++ b/hpvm/projects/onnx/src/lenet_onnx.ipynb @@ -0,0 +1,191 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import keras\n", + "from keras.datasets import mnist\n", + "from keras import backend as K\n", + "import numpy as np\n", + "import os\n", + "import keras2onnx\n", + "import onnx\n", + "import onnxruntime\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 128\n", + "num_classes = 10\n", + "# input image dimensions\n", + "img_rows, img_cols = 28, 28 " + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Changing Keras data format to NCHW - NHWC is default\n", + "# NOTE: ApproxHPVM requires NCHW format\n", + "K.set_image_data_format('channels_first')\n", + "\n", + "# Loads Mnist dataset\n", + "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", + "test_labels = y_test\n", + "\n", + "# Reshaping data to be NCHW format \n", + "x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n", + "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n", + "input_shape = (1, img_rows, img_cols)\n", + "\n", + "\n", + "# Data Normalization \n", + "x_train = x_train.astype('float32')\n", + "x_test = x_test.astype('float32')\n", + "x_train /= 255\n", + "x_test /= 255\n", + "\n", + "\n", + "# convert class vectors to binary class matrices - required by Keras\n", + "y_train = keras.utils.to_categorical(y_train, num_classes)\n", + "y_test = keras.utils.to_categorical(y_test, num_classes)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "sess = onnxruntime.InferenceSession(\"../models/keras/lenet.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input name : conv2d_1_input\n", + "Input shape : [None, 1, 28, 28]\n", + "Input type : tensor(float)\n" + ] + } + ], + "source": [ + "input_name = sess.get_inputs()[0].name\n", + "print(\"Input name :\", input_name)\n", + "input_shape = sess.get_inputs()[0].shape\n", + "print(\"Input shape :\", input_shape)\n", + "input_type = sess.get_inputs()[0].type\n", + "print(\"Input type :\", input_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Output name : activation_1\n", + "Output shape : [None, 10]\n", + "Output type : tensor(float)\n" + ] + } + ], + "source": [ + "output_name = sess.get_outputs()[0].name\n", + "print(\"Output name :\", output_name) \n", + "output_shape = sess.get_outputs()[0].shape\n", + "print(\"Output shape :\", output_shape)\n", + "output_type = sess.get_outputs()[0].type\n", + "print(\"Output type :\", output_type)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "start = time.time()\n", + "ort_result = sess.run([output_name], {input_name: x_test[:8000]})\n", + "print(\"time:\", time.time() - start)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "with open('dumps/lenet_ort_dump', 'wb') as fp:\n", + " pickle.dump(ort_result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/lenet_ort_dump', 'rb') as fp:\n", + " ort_res = pickle.load(fp)\n", + "with open ('dumps/lenet_keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(ort_res[0], keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/lenet_tvm.ipynb b/hpvm/projects/onnx/src/lenet_tvm.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..274553dcc52d2b84f601a4480b04adc54a1ac0ea --- /dev/null +++ b/hpvm/projects/onnx/src/lenet_tvm.ipynb @@ -0,0 +1,241 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from keras.datasets import mnist\n", + "from keras import backend as K\n", + "import keras\n", + "import sys\n", + "import struct\n", + "import numpy as np\n", + "import os\n", + "import tvm\n", + "import tvm.relay as relay\n", + "from tvm.contrib import graph_runtime\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 128\n", + "num_classes = 10\n", + "# input image dimensions\n", + "img_rows, img_cols = 28, 28 " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Changing Keras data format to NCHW - NHWC is default\n", + "# NOTE: ApproxHPVM requires NCHW format\n", + "K.set_image_data_format('channels_first')\n", + "\n", + "# Loads Mnist dataset\n", + "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", + "test_labels = y_test\n", + "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n", + "input_shape = (1, img_rows, img_cols)\n", + "x_test = x_test.astype('float32')\n", + "x_test /= 255" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import onnx\n", + "onnx_model = onnx.load(\"../models/keras/lenet.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n" + ] + } + ], + "source": [ + "target='cuda -libs=cudnn,cublas'\n", + "input_name = 'conv2d_1_input'\n", + "shape_dict = {input_name: x_test.shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "ctx = tvm.gpu()\n", + "with relay.build_config(opt_level=3):\n", + " executor = relay.build_module.create_executor('graph', mod, ctx, target)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (10000, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (10000, 3136, 'float32'), (1024, 3136, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 64, 14, 14, 'float32'), (64, 64, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 32, 14, 14, 'float32'), (64, 32, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 1, 28, 28, 'float32'), (32, 1, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" + ] + } + ], + "source": [ + "tvm_out = executor.evaluate()(tvm.nd.array(x_test), **params)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 3136, 'float32'), (1024, 3136, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 64, 14, 14, 'float32'), (64, 64, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 32, 14, 14, 'float32'), (64, 32, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 1, 28, 28, 'float32'), (32, 1, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" + ] + } + ], + "source": [ + "input_name = 'conv2d_1_input'\n", + "input_size = 8000\n", + "shape_dict = {input_name: x_test[:input_size].shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "target = 'cuda -libs=cudnn,cublas'\n", + "with relay.build_config(opt_level=3):\n", + " graph, lib, params = relay.build(mod, target, params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time: 0.043964385986328125\n" + ] + } + ], + "source": [ + "import time\n", + "ctx = tvm.gpu()\n", + "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", + "# create module\n", + "module = graph_runtime.create(graph, lib, ctx)\n", + "# set input and parameters\n", + "module.set_input(\"conv2d_1_input\", x_test[:input_size])\n", + "module.set_input(**params)\n", + "# run\n", + "start_time = time.time()\n", + "module.run()\n", + "out_shape = (input_size, 10)\n", + "# get output\n", + "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", + "print(\"Time:\", time.time() - start_time)\n", + "#ftimer = module.module.time_evaluator(\"get_output\", ctx, number=1)\n", + "#prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond\n", + "#print(\"%-20s %-19s (%s)\" % (\"lenet\", \"%.2f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "with open('dumps/lenet_tvm_dump', 'wb') as fp:\n", + " pickle.dump(out, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/lenet_tvm_dump', 'rb') as fp:\n", + " tvm_res = pickle.load(fp)\n", + "with open ('dumps/lenet_keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(tvm_res, keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/mobilenet_keras.ipynb b/hpvm/projects/onnx/src/mobilenet_keras.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5175bc2aef1403fec6a70ffb0cdd504a062d5f96 --- /dev/null +++ b/hpvm/projects/onnx/src/mobilenet_keras.ipynb @@ -0,0 +1,592 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import sys\n", + "import os\n", + "\n", + "from keras.models import Sequential\n", + "from keras.layers import *\n", + "from keras.datasets import cifar10\n", + "from keras.utils import to_categorical\n", + "from keras.callbacks import *\n", + "from keras.preprocessing.image import ImageDataGenerator\n", + "from keras.models import Model\n", + "from keras import optimizers\n", + "import keras.backend as K\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X_train.shape = (50000, 3, 32, 32)\n", + "X_test.shape = (10000, 3, 32, 32)\n" + ] + } + ], + "source": [ + "K.set_image_data_format('channels_first')\n", + "\n", + "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n", + "test_labels = y_test\n", + "\n", + "print (\"X_train.shape = \", X_train.shape)\n", + "print (\"X_test.shape = \", X_test.shape)\n", + "\n", + "\n", + "X_train = X_train.astype('float32')\n", + "X_test = X_test.astype('float32')\n", + "\n", + "\n", + "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n", + "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n", + "\n", + "X_train = (X_train - mean) / (std + 1e-9)\n", + "X_test = (X_test - mean) / (std + 1e-9)\n", + "\n", + "y_train = to_categorical(y_train, num_classes=10)\n", + "y_test = to_categorical(y_test, num_classes=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def get_mobilenet(alpha=1, depth_multiplier=1):\n", + " model = Sequential()\n", + " \n", + " def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):\n", + " channel_axis = 1\n", + " \n", + " model.add(Conv2D(filters, kernel,\n", + " padding='same',\n", + " use_bias=False,\n", + " strides=strides, \n", + " input_shape=(3, 32, 32)))\n", + " model.add(BatchNormalization(axis=channel_axis))\n", + " model.add(Activation('relu'))\n", + " \n", + " def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):\n", + " channel_axis = 1 \n", + "\n", + " model.add(ZeroPadding2D(padding = ((1,1), (1,1) )))\n", + "\n", + " model.add(DepthwiseConv2D((3, 3),\n", + " padding='valid',\n", + " #depth_multiplier=depth_multiplier,\n", + " strides=strides,\n", + " use_bias=False)) \n", + " model.add(BatchNormalization(axis=channel_axis))\n", + " \n", + " model.add(Activation('relu'))\n", + " model.add(Conv2D(pointwise_conv_filters, (1, 1),\n", + " padding='same',\n", + " use_bias=False,\n", + " strides=(1, 1)))\n", + " model.add(BatchNormalization(axis=channel_axis))\n", + " model.add(Activation('relu'))\n", + "\n", + "\n", + " _conv_block(32, alpha, strides=(1, 1))\n", + " \n", + " _depthwise_conv_block(64, alpha, depth_multiplier)\n", + " \n", + " _depthwise_conv_block(128, alpha, depth_multiplier,\n", + " strides=(2, 2))\n", + " _depthwise_conv_block(128, alpha, depth_multiplier)\n", + " model.add(Dropout(rate=0.5))\n", + "\n", + " _depthwise_conv_block(256, alpha, depth_multiplier, \n", + " strides=(2, 2))\n", + " _depthwise_conv_block(256, alpha, depth_multiplier)\n", + " model.add(Dropout(rate=0.5))\n", + "\n", + " _depthwise_conv_block(512, alpha, depth_multiplier,\n", + " strides=(2, 2))\n", + " _depthwise_conv_block(512, alpha, depth_multiplier)\n", + " _depthwise_conv_block(512, alpha, depth_multiplier)\n", + " model.add(Dropout(rate=0.5))\n", + " \n", + " _depthwise_conv_block(512, alpha, depth_multiplier)\n", + " _depthwise_conv_block(512, alpha, depth_multiplier)\n", + " _depthwise_conv_block(512, alpha, depth_multiplier)\n", + " model.add(Dropout(rate=0.5))\n", + " \n", + " _depthwise_conv_block(1024, alpha, depth_multiplier,\n", + " strides=(2, 2))\n", + " _depthwise_conv_block(1024, alpha, depth_multiplier)\n", + " model.add(Dropout(rate=0.5))\n", + "\n", + " model.add(AveragePooling2D(pool_size=2))\n", + " model.add(Flatten())\n", + " model.add(Dense(10, activation='softmax'))\n", + "\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/50\n", + "391/391 [==============================] - 36s 92ms/step - loss: 2.0463 - acc: 0.2513 - val_loss: 2.2584 - val_acc: 0.2776\n", + "Epoch 2/50\n", + "391/391 [==============================] - 31s 80ms/step - loss: 1.6583 - acc: 0.3977 - val_loss: 1.9809 - val_acc: 0.3528\n", + "Epoch 3/50\n", + "391/391 [==============================] - 32s 81ms/step - loss: 1.4632 - acc: 0.4740 - val_loss: 1.6017 - val_acc: 0.4475\n", + "Epoch 4/50\n", + "391/391 [==============================] - 32s 82ms/step - loss: 1.3280 - acc: 0.5213 - val_loss: 1.3650 - val_acc: 0.5158\n", + "Epoch 5/50\n", + "391/391 [==============================] - 32s 82ms/step - loss: 1.1912 - acc: 0.5727 - val_loss: 1.2064 - val_acc: 0.5807\n", + "Epoch 6/50\n", + "391/391 [==============================] - 31s 80ms/step - loss: 1.0700 - acc: 0.6203 - val_loss: 1.1014 - val_acc: 0.6085\n", + "Epoch 7/50\n", + "391/391 [==============================] - 31s 80ms/step - loss: 1.0232 - acc: 0.6350 - val_loss: 1.0276 - val_acc: 0.6331\n", + "Epoch 8/50\n", + "391/391 [==============================] - 31s 79ms/step - loss: 0.9783 - acc: 0.6498 - val_loss: 1.0276 - val_acc: 0.6398\n", + "Epoch 9/50\n", + "391/391 [==============================] - 31s 78ms/step - loss: 0.9359 - acc: 0.6673 - val_loss: 1.0931 - val_acc: 0.6226\n", + "Epoch 10/50\n", + "391/391 [==============================] - 30s 76ms/step - loss: 0.9058 - acc: 0.6776 - val_loss: 1.1184 - val_acc: 0.6105\n", + "Epoch 11/50\n", + "391/391 [==============================] - 30s 76ms/step - loss: 0.8781 - acc: 0.6884 - val_loss: 1.0405 - val_acc: 0.6498\n", + "Epoch 12/50\n", + "391/391 [==============================] - 29s 74ms/step - loss: 0.8502 - acc: 0.6998 - val_loss: 1.3313 - val_acc: 0.5514\n", + "Epoch 13/50\n", + "391/391 [==============================] - 29s 75ms/step - loss: 0.8211 - acc: 0.7077 - val_loss: 1.0371 - val_acc: 0.6443\n", + "Epoch 14/50\n", + "391/391 [==============================] - 28s 73ms/step - loss: 0.7958 - acc: 0.7181 - val_loss: 0.8624 - val_acc: 0.7061\n", + "Epoch 15/50\n", + "391/391 [==============================] - 29s 74ms/step - loss: 0.7720 - acc: 0.7257 - val_loss: 0.8090 - val_acc: 0.7224\n", + "Epoch 16/50\n", + "391/391 [==============================] - 29s 74ms/step - loss: 0.7508 - acc: 0.7324 - val_loss: 0.7846 - val_acc: 0.7272\n", + "Epoch 17/50\n", + "391/391 [==============================] - 30s 76ms/step - loss: 0.7292 - acc: 0.7432 - val_loss: 0.7717 - val_acc: 0.7323\n", + "Epoch 18/50\n", + "391/391 [==============================] - 33s 85ms/step - loss: 0.7047 - acc: 0.7511 - val_loss: 0.7407 - val_acc: 0.7453\n", + "Epoch 19/50\n", + "391/391 [==============================] - 33s 85ms/step - loss: 0.6884 - acc: 0.7570 - val_loss: 0.7040 - val_acc: 0.7537\n", + "Epoch 20/50\n", + "391/391 [==============================] - 33s 85ms/step - loss: 0.6718 - acc: 0.7652 - val_loss: 0.9103 - val_acc: 0.6907\n", + "Epoch 21/50\n", + "391/391 [==============================] - 33s 85ms/step - loss: 0.6489 - acc: 0.7722 - val_loss: 0.7054 - val_acc: 0.7567\n", + "Epoch 22/50\n", + "391/391 [==============================] - 33s 84ms/step - loss: 0.6330 - acc: 0.7771 - val_loss: 0.7032 - val_acc: 0.7587\n", + "Epoch 23/50\n", + "391/391 [==============================] - 33s 86ms/step - loss: 0.6157 - acc: 0.7831 - val_loss: 0.6955 - val_acc: 0.7652\n", + "Epoch 24/50\n", + "391/391 [==============================] - 32s 83ms/step - loss: 0.6027 - acc: 0.7882 - val_loss: 0.7599 - val_acc: 0.7419\n", + "Epoch 25/50\n", + "391/391 [==============================] - 32s 83ms/step - loss: 0.5910 - acc: 0.7932 - val_loss: 0.6315 - val_acc: 0.7830\n", + "Epoch 26/50\n", + "391/391 [==============================] - 33s 84ms/step - loss: 0.5743 - acc: 0.7967 - val_loss: 0.6595 - val_acc: 0.7772\n", + "Epoch 27/50\n", + "391/391 [==============================] - 32s 82ms/step - loss: 0.5610 - acc: 0.8027 - val_loss: 0.6628 - val_acc: 0.7691\n", + "Epoch 28/50\n", + "391/391 [==============================] - 31s 80ms/step - loss: 0.5508 - acc: 0.8074 - val_loss: 0.6211 - val_acc: 0.7820\n", + "Epoch 29/50\n", + "391/391 [==============================] - 31s 81ms/step - loss: 0.5293 - acc: 0.8130 - val_loss: 0.6221 - val_acc: 0.7852\n", + "Epoch 30/50\n", + "391/391 [==============================] - 30s 78ms/step - loss: 0.5205 - acc: 0.8158 - val_loss: 0.6252 - val_acc: 0.7891\n", + "Epoch 31/50\n", + "391/391 [==============================] - 31s 79ms/step - loss: 0.5128 - acc: 0.8195 - val_loss: 0.6213 - val_acc: 0.7844\n", + "Epoch 32/50\n", + "391/391 [==============================] - 29s 75ms/step - loss: 0.4988 - acc: 0.8241 - val_loss: 0.5745 - val_acc: 0.8046\n", + "Epoch 33/50\n", + "391/391 [==============================] - 30s 76ms/step - loss: 0.4898 - acc: 0.8273 - val_loss: 0.5938 - val_acc: 0.8015\n", + "Epoch 34/50\n", + "391/391 [==============================] - 28s 72ms/step - loss: 0.4789 - acc: 0.8294 - val_loss: 0.5693 - val_acc: 0.8074\n", + "Epoch 35/50\n", + "391/391 [==============================] - 28s 72ms/step - loss: 0.4646 - acc: 0.8371 - val_loss: 0.5743 - val_acc: 0.8042\n", + "Epoch 36/50\n", + "391/391 [==============================] - 29s 74ms/step - loss: 0.4605 - acc: 0.8364 - val_loss: 0.6270 - val_acc: 0.7865\n", + "Epoch 37/50\n", + "391/391 [==============================] - 33s 84ms/step - loss: 0.4499 - acc: 0.8405 - val_loss: 0.6014 - val_acc: 0.7960\n", + "Epoch 38/50\n", + "391/391 [==============================] - 32s 83ms/step - loss: 0.4459 - acc: 0.8438 - val_loss: 0.6058 - val_acc: 0.7971\n", + "Epoch 39/50\n", + "391/391 [==============================] - 33s 84ms/step - loss: 0.4341 - acc: 0.8473 - val_loss: 0.5551 - val_acc: 0.8125\n", + "Epoch 40/50\n", + "391/391 [==============================] - 33s 83ms/step - loss: 0.4238 - acc: 0.8496 - val_loss: 0.5445 - val_acc: 0.8165\n", + "Epoch 41/50\n", + "391/391 [==============================] - 33s 83ms/step - loss: 0.4160 - acc: 0.8516 - val_loss: 0.5873 - val_acc: 0.8058\n", + "Epoch 42/50\n", + "391/391 [==============================] - 32s 83ms/step - loss: 0.4048 - acc: 0.8546 - val_loss: 0.6693 - val_acc: 0.7822\n", + "Epoch 43/50\n", + "391/391 [==============================] - 33s 83ms/step - loss: 0.4021 - acc: 0.8583 - val_loss: 0.5408 - val_acc: 0.8168\n", + "Epoch 44/50\n", + "391/391 [==============================] - 33s 84ms/step - loss: 0.3927 - acc: 0.8610 - val_loss: 0.5538 - val_acc: 0.8117\n", + "Epoch 45/50\n", + "391/391 [==============================] - 33s 83ms/step - loss: 0.3904 - acc: 0.8615 - val_loss: 0.5981 - val_acc: 0.8063\n", + "Epoch 46/50\n", + "391/391 [==============================] - 32s 81ms/step - loss: 0.3811 - acc: 0.8642 - val_loss: 0.5285 - val_acc: 0.8223\n", + "Epoch 47/50\n", + "391/391 [==============================] - 32s 83ms/step - loss: 0.3726 - acc: 0.8676 - val_loss: 0.5317 - val_acc: 0.8206\n", + "Epoch 48/50\n", + "391/391 [==============================] - 31s 79ms/step - loss: 0.3661 - acc: 0.8692 - val_loss: 0.5568 - val_acc: 0.8172\n", + "Epoch 49/50\n", + "391/391 [==============================] - 31s 79ms/step - loss: 0.3582 - acc: 0.8711 - val_loss: 0.5057 - val_acc: 0.8320\n", + "Epoch 50/50\n", + "391/391 [==============================] - 29s 75ms/step - loss: 0.3516 - acc: 0.8740 - val_loss: 0.5760 - val_acc: 0.8099\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "conv2d_1 (Conv2D) (None, 32, 32, 32) 864 \n", + "_________________________________________________________________\n", + "batch_normalization_1 (Batch (None, 32, 32, 32) 128 \n", + "_________________________________________________________________\n", + "activation_1 (Activation) (None, 32, 32, 32) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_1 (ZeroPaddin (None, 32, 34, 34) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_1 (Depthwis (None, 32, 32, 32) 288 \n", + "_________________________________________________________________\n", + "batch_normalization_2 (Batch (None, 32, 32, 32) 128 \n", + "_________________________________________________________________\n", + "activation_2 (Activation) (None, 32, 32, 32) 0 \n", + "_________________________________________________________________\n", + "conv2d_2 (Conv2D) (None, 64, 32, 32) 2048 \n", + "_________________________________________________________________\n", + "batch_normalization_3 (Batch (None, 64, 32, 32) 256 \n", + "_________________________________________________________________\n", + "activation_3 (Activation) (None, 64, 32, 32) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_2 (ZeroPaddin (None, 64, 34, 34) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_2 (Depthwis (None, 64, 16, 16) 576 \n", + "_________________________________________________________________\n", + "batch_normalization_4 (Batch (None, 64, 16, 16) 256 \n", + "_________________________________________________________________\n", + "activation_4 (Activation) (None, 64, 16, 16) 0 \n", + "_________________________________________________________________\n", + "conv2d_3 (Conv2D) (None, 128, 16, 16) 8192 \n", + "_________________________________________________________________\n", + "batch_normalization_5 (Batch (None, 128, 16, 16) 512 \n", + "_________________________________________________________________\n", + "activation_5 (Activation) (None, 128, 16, 16) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_3 (ZeroPaddin (None, 128, 18, 18) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_3 (Depthwis (None, 128, 16, 16) 1152 \n", + "_________________________________________________________________\n", + "batch_normalization_6 (Batch (None, 128, 16, 16) 512 \n", + "_________________________________________________________________\n", + "activation_6 (Activation) (None, 128, 16, 16) 0 \n", + "_________________________________________________________________\n", + "conv2d_4 (Conv2D) (None, 128, 16, 16) 16384 \n", + "_________________________________________________________________\n", + "batch_normalization_7 (Batch (None, 128, 16, 16) 512 \n", + "_________________________________________________________________\n", + "activation_7 (Activation) (None, 128, 16, 16) 0 \n", + "_________________________________________________________________\n", + "dropout_1 (Dropout) (None, 128, 16, 16) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_4 (ZeroPaddin (None, 128, 18, 18) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_4 (Depthwis (None, 128, 8, 8) 1152 \n", + "_________________________________________________________________\n", + "batch_normalization_8 (Batch (None, 128, 8, 8) 512 \n", + "_________________________________________________________________\n", + "activation_8 (Activation) (None, 128, 8, 8) 0 \n", + "_________________________________________________________________\n", + "conv2d_5 (Conv2D) (None, 256, 8, 8) 32768 \n", + "_________________________________________________________________\n", + "batch_normalization_9 (Batch (None, 256, 8, 8) 1024 \n", + "_________________________________________________________________\n", + "activation_9 (Activation) (None, 256, 8, 8) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_5 (ZeroPaddin (None, 256, 10, 10) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_5 (Depthwis (None, 256, 8, 8) 2304 \n", + "_________________________________________________________________\n", + "batch_normalization_10 (Batc (None, 256, 8, 8) 1024 \n", + "_________________________________________________________________\n", + "activation_10 (Activation) (None, 256, 8, 8) 0 \n", + "_________________________________________________________________\n", + "conv2d_6 (Conv2D) (None, 256, 8, 8) 65536 \n", + "_________________________________________________________________\n", + "batch_normalization_11 (Batc (None, 256, 8, 8) 1024 \n", + "_________________________________________________________________\n", + "activation_11 (Activation) (None, 256, 8, 8) 0 \n", + "_________________________________________________________________\n", + "dropout_2 (Dropout) (None, 256, 8, 8) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_6 (ZeroPaddin (None, 256, 10, 10) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_6 (Depthwis (None, 256, 4, 4) 2304 \n", + "_________________________________________________________________\n", + "batch_normalization_12 (Batc (None, 256, 4, 4) 1024 \n", + "_________________________________________________________________\n", + "activation_12 (Activation) (None, 256, 4, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_7 (Conv2D) (None, 512, 4, 4) 131072 \n", + "_________________________________________________________________\n", + "batch_normalization_13 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_13 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_7 (ZeroPaddin (None, 512, 6, 6) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_7 (Depthwis (None, 512, 4, 4) 4608 \n", + "_________________________________________________________________\n", + "batch_normalization_14 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_14 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_8 (Conv2D) (None, 512, 4, 4) 262144 \n", + "_________________________________________________________________\n", + "batch_normalization_15 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_15 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_8 (ZeroPaddin (None, 512, 6, 6) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_8 (Depthwis (None, 512, 4, 4) 4608 \n", + "_________________________________________________________________\n", + "batch_normalization_16 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_16 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_9 (Conv2D) (None, 512, 4, 4) 262144 \n", + "_________________________________________________________________\n", + "batch_normalization_17 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_17 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "dropout_3 (Dropout) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_9 (ZeroPaddin (None, 512, 6, 6) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_9 (Depthwis (None, 512, 4, 4) 4608 \n", + "_________________________________________________________________\n", + "batch_normalization_18 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_18 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_10 (Conv2D) (None, 512, 4, 4) 262144 \n", + "_________________________________________________________________\n", + "batch_normalization_19 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_19 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_10 (ZeroPaddi (None, 512, 6, 6) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_10 (Depthwi (None, 512, 4, 4) 4608 \n", + "_________________________________________________________________\n", + "batch_normalization_20 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_20 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_11 (Conv2D) (None, 512, 4, 4) 262144 \n", + "_________________________________________________________________\n", + "batch_normalization_21 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_21 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_11 (ZeroPaddi (None, 512, 6, 6) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_11 (Depthwi (None, 512, 4, 4) 4608 \n", + "_________________________________________________________________\n", + "batch_normalization_22 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_22 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_12 (Conv2D) (None, 512, 4, 4) 262144 \n", + "_________________________________________________________________\n", + "batch_normalization_23 (Batc (None, 512, 4, 4) 2048 \n", + "_________________________________________________________________\n", + "activation_23 (Activation) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "dropout_4 (Dropout) (None, 512, 4, 4) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_12 (ZeroPaddi (None, 512, 6, 6) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_12 (Depthwi (None, 512, 2, 2) 4608 \n", + "_________________________________________________________________\n", + "batch_normalization_24 (Batc (None, 512, 2, 2) 2048 \n", + "_________________________________________________________________\n", + "activation_24 (Activation) (None, 512, 2, 2) 0 \n", + "_________________________________________________________________\n", + "conv2d_13 (Conv2D) (None, 1024, 2, 2) 524288 \n", + "_________________________________________________________________\n", + "batch_normalization_25 (Batc (None, 1024, 2, 2) 4096 \n", + "_________________________________________________________________\n", + "activation_25 (Activation) (None, 1024, 2, 2) 0 \n", + "_________________________________________________________________\n", + "zero_padding2d_13 (ZeroPaddi (None, 1024, 4, 4) 0 \n", + "_________________________________________________________________\n", + "depthwise_conv2d_13 (Depthwi (None, 1024, 2, 2) 9216 \n", + "_________________________________________________________________\n", + "batch_normalization_26 (Batc (None, 1024, 2, 2) 4096 \n", + "_________________________________________________________________\n", + "activation_26 (Activation) (None, 1024, 2, 2) 0 \n", + "_________________________________________________________________\n", + "conv2d_14 (Conv2D) (None, 1024, 2, 2) 1048576 \n", + "_________________________________________________________________\n", + "batch_normalization_27 (Batc (None, 1024, 2, 2) 4096 \n", + "_________________________________________________________________\n", + "activation_27 (Activation) (None, 1024, 2, 2) 0 \n", + "_________________________________________________________________\n", + "dropout_5 (Dropout) (None, 1024, 2, 2) 0 \n", + "_________________________________________________________________\n", + "average_pooling2d_1 (Average (None, 1024, 1, 1) 0 \n", + "_________________________________________________________________\n", + "flatten_1 (Flatten) (None, 1024) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 10) 10250 \n", + "=================================================================\n", + "Total params: 3,239,114\n", + "Trainable params: 3,217,226\n", + "Non-trainable params: 21,888\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "# data augmentation, horizontal flips only\n", + "datagen = ImageDataGenerator(\n", + " featurewise_center=False,\n", + " featurewise_std_normalization=False,\n", + " rotation_range=0.0,\n", + " width_shift_range=0.0,\n", + " height_shift_range=0.0,\n", + " vertical_flip=False,\n", + " horizontal_flip=True)\n", + "datagen.fit(X_train)\n", + "\n", + "model = get_mobilenet()\n", + "\n", + "learning_rates=[]\n", + "for i in range(5):\n", + " learning_rates.append(2e-2)\n", + "for i in range(50-5):\n", + " learning_rates.append(1e-2)\n", + "for i in range(100-50):\n", + " learning_rates.append(8e-3)\n", + "for i in range(150-100):\n", + " learning_rates.append(4e-3)\n", + "for i in range(200-150):\n", + " learning_rates.append(2e-3)\n", + "for i in range(300-200):\n", + " learning_rates.append(1e-3)\n", + "\n", + "callbacks = [\n", + " LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))\n", + "]\n", + "\n", + "model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), \n", + " loss='categorical_crossentropy', \n", + " metrics=['accuracy'])\n", + "\n", + "model.fit_generator(\n", + " datagen.flow(X_train, y_train, batch_size=128),\n", + " steps_per_epoch=int(np.ceil(50000 / 128)),\n", + " validation_data=(X_test, y_test),\n", + " #epochs=300,\n", + " epochs=50,\n", + " callbacks=callbacks\n", + ")\n", + "\n", + "model.summary()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 0.012634992599487305\n" + ] + } + ], + "source": [ + "import time\n", + "start_time = time.time()\n", + "keras_result = model.predict(X_test[:20])\n", + "print(\"time: \", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "import keras2onnx\n", + "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", + "import onnx\n", + "onnx.save(onnx_model, \"../models/keras/mobilenet.onnx\")\n", + "import pickle\n", + "with open('dumps/mobilenet_keras_dump', 'wb') as fp:\n", + " pickle.dump(keras_result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/mobilenet_onnx.ipynb b/hpvm/projects/onnx/src/mobilenet_onnx.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8b26d865421ca5c819b32abeb295b0169254a055 --- /dev/null +++ b/hpvm/projects/onnx/src/mobilenet_onnx.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import sys\n", + "import keras\n", + "from keras.datasets import cifar10\n", + "from keras import backend as K\n", + "from keras.utils import to_categorical\n", + "import numpy as np\n", + "import os\n", + "import keras2onnx\n", + "import onnx\n", + "import onnxruntime" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X_train.shape = (50000, 3, 32, 32)\n", + "X_test.shape = (10000, 3, 32, 32)\n" + ] + } + ], + "source": [ + "K.set_image_data_format('channels_first')\n", + "\n", + "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n", + "test_labels = y_test\n", + "\n", + "print (\"X_train.shape = \", X_train.shape)\n", + "print (\"X_test.shape = \", X_test.shape)\n", + "\n", + "\n", + "X_train = X_train.astype('float32')\n", + "X_test = X_test.astype('float32')\n", + "\n", + "\n", + "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n", + "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n", + "\n", + "X_train = (X_train - mean) / (std + 1e-9)\n", + "X_test = (X_test - mean) / (std + 1e-9)\n", + "\n", + "y_train = to_categorical(y_train, num_classes=10)\n", + "y_test = to_categorical(y_test, num_classes=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "sess = onnxruntime.InferenceSession(\"../models/keras/mobilenet.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input name : conv2d_1_input\n", + "Input shape : [None, 3, 32, 32]\n", + "Input type : tensor(float)\n" + ] + } + ], + "source": [ + "input_name = sess.get_inputs()[0].name\n", + "print(\"Input name :\", input_name)\n", + "input_shape = sess.get_inputs()[0].shape\n", + "print(\"Input shape :\", input_shape)\n", + "input_type = sess.get_inputs()[0].type\n", + "print(\"Input type :\", input_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Output name : dense_1\n", + "Output shape : [None, 10]\n", + "Output type : tensor(float)\n" + ] + } + ], + "source": [ + "output_name = sess.get_outputs()[0].name\n", + "print(\"Output name :\", output_name) \n", + "output_shape = sess.get_outputs()[0].shape\n", + "print(\"Output shape :\", output_shape)\n", + "output_type = sess.get_outputs()[0].type\n", + "print(\"Output type :\", output_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 0.006893157958984375\n" + ] + } + ], + "source": [ + "import time\n", + "start_time = time.time()\n", + "ort_result = sess.run([output_name], {input_name: X_test[:40]})\n", + "print(\"time: \", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "with open('dumps/mobilenet_ort_dump', 'wb') as fp:\n", + " pickle.dump(ort_result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/mobilenet_ort_dump', 'rb') as fp:\n", + " ort_res = pickle.load(fp)\n", + "with open ('dumps/mobilenet_keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(ort_res[0], keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/mobilenet_tvm.ipynb b/hpvm/projects/onnx/src/mobilenet_tvm.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c0b71588afdea134ed4a2aa675d2c6058772b0b0 --- /dev/null +++ b/hpvm/projects/onnx/src/mobilenet_tvm.ipynb @@ -0,0 +1,381 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from keras.datasets import cifar10\n", + "from keras import backend as K\n", + "import sys\n", + "import struct\n", + "import numpy as np\n", + "import os\n", + "import tvm\n", + "import tvm.relay as relay\n", + "from tvm.contrib import graph_runtime\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "K.set_image_data_format('channels_first')\n", + "\n", + "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n", + "X_test = X_test.astype('float32')\n", + "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n", + "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n", + "\n", + "X_test = (X_test - mean) / (std + 1e-9)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#print(X_test.shape)\n", + "#X_test = X_test[:8000]\n", + "print(X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#import keras\n", + "#model = keras.applications.mobilenet.MobileNet(input_shape=X_test[0].shape, include_top=False, weights=None)\n", + "#import keras2onnx\n", + "#onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", + "import onnx\n", + "onnx_model = onnx.load(\"../models/keras/mobilenet.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "target='cuda -libs=cudnn,cublas'\n", + "input_name = 'conv2d_1_input'\n", + "shape_dict = {input_name: X_test[:10].shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "ctx = tvm.gpu()\n", + "with relay.build_config(opt_level=3):\n", + " executor = relay.build_module.create_executor('graph', mod, ctx, target)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# LLVM EXECUTE SUCCEEDED\n", + "import time\n", + "start_time = time.time()\n", + "tvm_out = executor.evaluate()(tvm.nd.array(X_test[:1000]), **params)\n", + "print(\"Time:\", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n", + "import pickle\n", + "with open('dumps/tvm_dump', 'wb') as fp:\n", + " pickle.dump(tvm_output, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.avg_pool2d\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (40, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 1024, 2, 2, 'float32'), (1024, 1024, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 1024, 2, 2, 'float32'), (1024, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 512, 2, 2, 'float32'), (1024, 512, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 512, 4, 4, 'float32'), (512, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 512, 4, 4, 'float32'), (512, 512, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 512, 4, 4, 'float32'), (512, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 256, 4, 4, 'float32'), (512, 256, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 256, 8, 8, 'float32'), (256, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 256, 8, 8, 'float32'), (256, 256, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 256, 8, 8, 'float32'), (256, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 128, 8, 8, 'float32'), (256, 128, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 128, 16, 16, 'float32'), (128, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 128, 16, 16, 'float32'), (128, 128, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 128, 16, 16, 'float32'), (128, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 64, 16, 16, 'float32'), (128, 64, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 64, 32, 32, 'float32'), (64, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 32, 32, 32, 'float32'), (64, 32, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 32, 32, 32, 'float32'), (32, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 3, 32, 32, 'float32'), (32, 3, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" + ] + } + ], + "source": [ + "input_name = 'conv2d_1_input'\n", + "input_size = 40\n", + "shape_dict = {input_name: X_test[:input_size].shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "target = 'cuda -libs=cudnn,cublas'\n", + "with relay.build_config(opt_level=3):\n", + " graph, lib, params = relay.build(mod, target, params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "ctx = tvm.gpu()\n", + "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", + "# create module\n", + "module = graph_runtime.create(graph, lib, ctx)\n", + "# set input and parameters\n", + "module.set_input(\"conv2d_1_input\", X_test[:input_size])\n", + "module.set_input(**params)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time: 0.012343168258666992\n" + ] + } + ], + "source": [ + "# run\n", + "start_time = time.time()\n", + "module.run()\n", + "out_shape = (input_size, 10)\n", + "# get output\n", + "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", + "print(\"Time:\", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mobilenet 17.814301 ms (0.00 ms)\n" + ] + } + ], + "source": [ + "# evaluate\n", + "ftimer = module.module.time_evaluator(\"run\", ctx, number=1)\n", + "prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond\n", + "print(\"%-20s %-19s (%s)\" % (\"mobilenet\", \"%.6f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/tvm_dump', 'rb') as fp:\n", + " tvm_res = pickle.load(fp)\n", + "with open ('dumps/keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(tvm_res, keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test\n", + "print(\"Accuracy matched!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use TVM's implementation of mobilenet\n", + "import sys\n", + "import struct\n", + "import numpy as np\n", + "import os\n", + "import tvm\n", + "import tvm.relay as relay\n", + "from tvm.relay import testing\n", + "from tvm.contrib import graph_runtime\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "batch_size = 100\n", + "dtype = 'float32'\n", + "target='cuda -libs=cudnn,cublas'\n", + "net, params = testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype)\n", + "input_shape = (batch_size, 3, 224, 224)\n", + "output_shape = (batch_size, 1000)\n", + "with relay.build_config(opt_level=3):\n", + " graph, lib, params = relay.build(net, target=target, params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ctx = tvm.context(str(target), 0)\n", + "module = graph_runtime.create(graph, lib, ctx)\n", + "data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))\n", + "module.set_input('data', data_tvm)\n", + "module.set_input(**params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# evaluate\n", + "ftimer = module.module.time_evaluator(\"run\", ctx, number=1)\n", + "prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond\n", + "print(\"%-20s %-19s (%s)\" % (\"mobilenet\", \"%.2f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/vgg16_keras.ipynb b/hpvm/projects/onnx/src/vgg16_keras.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e1d1422ead3c3bc5afa602a13b3087ecbebc8eab --- /dev/null +++ b/hpvm/projects/onnx/src/vgg16_keras.ipynb @@ -0,0 +1,392 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import keras\n", + "from keras.datasets import cifar10\n", + "from keras.preprocessing.image import ImageDataGenerator\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Activation, Flatten\n", + "from keras.layers import Conv2D, MaxPooling2D, BatchNormalization\n", + "from keras import optimizers\n", + "import numpy as np\n", + "from keras.layers.core import Lambda\n", + "from keras import backend as K\n", + "from keras import regularizers\n", + "import os\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class cifar10vgg:\n", + " def __init__(self,train=True):\n", + " self.num_classes = 10\n", + " self.weight_decay = 0.0005\n", + " self.x_shape = [3,32,32]\n", + "\n", + " self.model = self.build_model()\n", + " if train:\n", + " self.model = self.train(self.model)\n", + " else:\n", + " self.model.load_weights('cifar10vgg.h5')\n", + "\n", + "\n", + " def build_model(self):\n", + " # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.\n", + "\n", + " model = Sequential()\n", + " weight_decay = self.weight_decay\n", + "\n", + " model.add(Conv2D(64, (3, 3), padding='same',\n", + " input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " #model.add(BatchNormalization())\n", + " model.add(Dropout(0.3))\n", + "\n", + " model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "\n", + " model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " #model.add(BatchNormalization())\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " #model.add(BatchNormalization())\n", + " model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "\n", + " model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " #model.add(BatchNormalization())\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "\n", + " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "\n", + " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(Dropout(0.4))\n", + "\n", + " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + " model.add(MaxPooling2D(pool_size=(2, 2)))\n", + " model.add(Dropout(0.5))\n", + "\n", + " model.add(Flatten())\n", + " model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))\n", + " model.add(Activation('relu'))\n", + " # model.add(BatchNormalization())\n", + "\n", + " model.add(Dropout(0.5))\n", + " model.add(Dense(self.num_classes))\n", + " model.add(Activation('softmax'))\n", + " return model\n", + "\n", + "\n", + " def normalize(self,X_train,X_test):\n", + " #this function normalize inputs for zero mean and unit variance\n", + " # it is used when training a model.\n", + " # Input: training set and test set\n", + " # Output: normalized training set and test set according to the trianing set statistics.\n", + " mean = np.mean(X_train,axis=(0,1,2,3))\n", + " std = np.std(X_train, axis=(0, 1, 2, 3))\n", + " X_train = (X_train-mean)/(std+1e-7)\n", + " X_test = (X_test-mean)/(std+1e-7)\n", + " return X_train, X_test\n", + "\n", + " \n", + " def normalize_production(self,x):\n", + " #this function is used to normalize instances in production according to saved training set statistics\n", + " # Input: X - a training set\n", + " # Output X - a normalized training set according to normalization constants.\n", + "\n", + " #these values produced during first training and are general for the standard cifar10 training set normalization\n", + " mean = 120.707\n", + " std = 64.15\n", + " return (x-mean)/(std+1e-7)\n", + "\n", + " \n", + " def predict(self,x,normalize=True,batch_size=50):\n", + " if normalize:\n", + " x = self.normalize_production(x)\n", + " return self.model.predict(x,batch_size)\n", + "\n", + " \n", + " def train(self,model):\n", + "\n", + " #training parameters\n", + " batch_size = 128\n", + " #maxepoches = 250\n", + " #maxepoches = 250\n", + " maxepoches = 30\n", + " learning_rate = 0.01\n", + " lr_decay = 1e-6\n", + " lr_drop = 20\n", + " # The data, shuffled and split between train and test sets:\n", + " (x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", + " x_train = x_train.astype('float32')\n", + " x_test = x_test.astype('float32')\n", + " x_train, x_test = self.normalize(x_train, x_test)\n", + "\n", + " y_train = keras.utils.to_categorical(y_train, self.num_classes)\n", + " y_test = keras.utils.to_categorical(y_test, self.num_classes)\n", + "\n", + " def lr_scheduler(epoch):\n", + " return learning_rate * (0.5 ** (epoch // lr_drop))\n", + " reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)\n", + "\n", + " #data augmentation\n", + " datagen = ImageDataGenerator(\n", + " featurewise_center=False, # set input mean to 0 over the dataset\n", + " samplewise_center=False, # set each sample mean to 0\n", + " featurewise_std_normalization=False, # divide inputs by std of the dataset\n", + " samplewise_std_normalization=False, # divide each input by its std\n", + " zca_whitening=False, # apply ZCA whitening\n", + " rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180)\n", + " width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)\n", + " height_shift_range=0.1, # randomly shift images vertically (fraction of total height)\n", + " horizontal_flip=True, # randomly flip images\n", + " vertical_flip=False) # randomly flip images\n", + " # (std, mean, and principal components if ZCA whitening is applied).\n", + " datagen.fit(x_train)\n", + "\n", + "\n", + "\n", + " #optimization details\n", + " sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)\n", + " model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])\n", + "\n", + "\n", + " # training process in a for loop with learning rate drop every 25 epoches.\n", + "\n", + " historytemp = model.fit_generator(datagen.flow(x_train, y_train,\n", + " batch_size=batch_size),\n", + " steps_per_epoch=x_train.shape[0] // batch_size,\n", + " epochs=maxepoches,\n", + " validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)\n", + " \n", + " model.save_weights('cifar10vgg.h5')\n", + " return model\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Variable *= will be deprecated. Use `var.assign(var * other)` if you want assignment to the variable value or `x = x * y` if you want a new python Tensor object.\n", + "Epoch 1/30\n", + " - 29s - loss: 4.3002 - acc: 0.1547 - val_loss: 4.2265 - val_acc: 0.1443\n", + "Epoch 2/30\n", + " - 23s - loss: 3.9048 - acc: 0.2016 - val_loss: 3.9923 - val_acc: 0.1640\n", + "Epoch 3/30\n", + " - 23s - loss: 3.7052 - acc: 0.2210 - val_loss: 3.8900 - val_acc: 0.1712\n", + "Epoch 4/30\n", + " - 22s - loss: 3.4890 - acc: 0.2839 - val_loss: 3.4656 - val_acc: 0.2731\n", + "Epoch 5/30\n", + " - 25s - loss: 3.2615 - acc: 0.3258 - val_loss: 3.1615 - val_acc: 0.3296\n", + "Epoch 6/30\n", + " - 23s - loss: 3.0814 - acc: 0.3557 - val_loss: 3.0238 - val_acc: 0.3395\n", + "Epoch 7/30\n", + " - 23s - loss: 2.9114 - acc: 0.3883 - val_loss: 2.8283 - val_acc: 0.4041\n", + "Epoch 8/30\n", + " - 23s - loss: 2.7543 - acc: 0.4239 - val_loss: 2.5891 - val_acc: 0.4622\n", + "Epoch 9/30\n", + " - 22s - loss: 2.6056 - acc: 0.4594 - val_loss: 2.5562 - val_acc: 0.4796\n", + "Epoch 10/30\n", + " - 25s - loss: 2.4590 - acc: 0.5013 - val_loss: 2.2543 - val_acc: 0.5714\n", + "Epoch 11/30\n", + " - 24s - loss: 2.3017 - acc: 0.5439 - val_loss: 2.1377 - val_acc: 0.5954\n", + "Epoch 12/30\n", + " - 22s - loss: 2.1519 - acc: 0.5811 - val_loss: 1.9902 - val_acc: 0.6356\n", + "Epoch 13/30\n", + " - 23s - loss: 2.0276 - acc: 0.6106 - val_loss: 1.8868 - val_acc: 0.6494\n", + "Epoch 14/30\n", + " - 22s - loss: 1.9065 - acc: 0.6357 - val_loss: 1.7370 - val_acc: 0.6813\n", + "Epoch 15/30\n", + " - 22s - loss: 1.8081 - acc: 0.6532 - val_loss: 1.7568 - val_acc: 0.6721\n", + "Epoch 16/30\n", + " - 22s - loss: 1.7116 - acc: 0.6736 - val_loss: 1.6687 - val_acc: 0.6959\n", + "Epoch 17/30\n", + " - 22s - loss: 1.6337 - acc: 0.6883 - val_loss: 1.4748 - val_acc: 0.7350\n", + "Epoch 18/30\n", + " - 22s - loss: 1.5609 - acc: 0.7022 - val_loss: 1.4428 - val_acc: 0.7445\n", + "Epoch 19/30\n", + " - 23s - loss: 1.4890 - acc: 0.7160 - val_loss: 1.3682 - val_acc: 0.7582\n", + "Epoch 20/30\n", + " - 22s - loss: 1.4291 - acc: 0.7298 - val_loss: 1.3004 - val_acc: 0.7717\n", + "Epoch 21/30\n", + " - 22s - loss: 1.3169 - acc: 0.7593 - val_loss: 1.2640 - val_acc: 0.7800\n", + "Epoch 22/30\n", + " - 24s - loss: 1.2843 - acc: 0.7669 - val_loss: 1.2191 - val_acc: 0.7864\n", + "Epoch 23/30\n", + " - 23s - loss: 1.2431 - acc: 0.7757 - val_loss: 1.1598 - val_acc: 0.7998\n", + "Epoch 24/30\n", + " - 22s - loss: 1.2166 - acc: 0.7813 - val_loss: 1.1435 - val_acc: 0.8097\n", + "Epoch 25/30\n", + " - 22s - loss: 1.1872 - acc: 0.7869 - val_loss: 1.1180 - val_acc: 0.8123\n", + "Epoch 26/30\n", + " - 23s - loss: 1.1634 - acc: 0.7916 - val_loss: 1.0977 - val_acc: 0.8123\n", + "Epoch 27/30\n", + " - 22s - loss: 1.1300 - acc: 0.7965 - val_loss: 1.0785 - val_acc: 0.8199\n", + "Epoch 28/30\n", + " - 22s - loss: 1.1169 - acc: 0.7983 - val_loss: 1.0927 - val_acc: 0.8120\n", + "Epoch 29/30\n", + " - 22s - loss: 1.0944 - acc: 0.8025 - val_loss: 1.0141 - val_acc: 0.8275\n", + "Epoch 30/30\n", + " - 22s - loss: 1.0686 - acc: 0.8069 - val_loss: 1.0341 - val_acc: 0.8206\n" + ] + } + ], + "source": [ + "K.set_image_data_format('channels_first')\n", + "\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n", + "\n", + "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", + "test_labels = y_test\n", + "train_labels = y_train\n", + "x_train = x_train.astype('float32')\n", + "x_test = x_test.astype('float32')\n", + "\n", + "y_train = keras.utils.to_categorical(y_train, 10)\n", + "y_test = keras.utils.to_categorical(y_test, 10)\n", + "\n", + "model = cifar10vgg()\n", + "\n", + "#predicted_x = model.predict(x_test)\n", + "\n", + "#norm_test = model.normalize_production(x_test)\n", + "\n", + "# Normalizing train data before dumping\n", + "#x_train, x_test = model.normalize(x_train, x_test)\n", + "#x_train = model.normalize_production(x_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 1.3477363586425781\n" + ] + } + ], + "source": [ + "import time\n", + "start = time.time()\n", + "result = model.predict(x_test[:8000])\n", + "print(\"time:\", time.time() - start)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import keras2onnx\n", + "onnx_model = keras2onnx.convert_keras(model.model, model.model.name, target_opset=10)\n", + "import onnx\n", + "onnx.save(onnx_model, \"../models/keras/vgg16_cifar10.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "with open('dumps/vgg16_keras_dump', 'wb') as fp:\n", + " pickle.dump(result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/vgg16_onnx.ipynb b/hpvm/projects/onnx/src/vgg16_onnx.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9d0453cfb754a488d06a00afa78383ff9269e326 --- /dev/null +++ b/hpvm/projects/onnx/src/vgg16_onnx.ipynb @@ -0,0 +1,158 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from keras.datasets import cifar10\n", + "from keras import backend as K\n", + "import keras\n", + "import numpy as np\n", + "import os\n", + "import keras2onnx\n", + "import onnx\n", + "import onnxruntime\n", + "import time\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "K.set_image_data_format('channels_first')\n", + "\n", + "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", + "test_labels = y_test\n", + "train_labels = y_train\n", + "x_train = x_train.astype('float32')\n", + "x_test = x_test.astype('float32')\n", + "\n", + "y_train = keras.utils.to_categorical(y_train, 10)\n", + "y_test = keras.utils.to_categorical(y_test, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input name : conv2d_1_input\n", + "Input shape : [None, 3, 32, 32]\n", + "Input type : tensor(float)\n", + "Output name : activation_15\n", + "Output shape : [None, 10]\n", + "Output type : tensor(float)\n" + ] + } + ], + "source": [ + "sess = onnxruntime.InferenceSession(\"../models/keras/vgg16_cifar10.onnx\")\n", + "input_name = sess.get_inputs()[0].name\n", + "print(\"Input name :\", input_name)\n", + "input_shape = sess.get_inputs()[0].shape\n", + "print(\"Input shape :\", input_shape)\n", + "input_type = sess.get_inputs()[0].type\n", + "print(\"Input type :\", input_type)\n", + "output_name = sess.get_outputs()[0].name\n", + "print(\"Output name :\", output_name) \n", + "output_shape = sess.get_outputs()[0].shape\n", + "print(\"Output shape :\", output_shape)\n", + "output_type = sess.get_outputs()[0].type\n", + "print(\"Output type :\", output_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 1.4347426891326904\n" + ] + } + ], + "source": [ + "start_time = time.time()\n", + "ort_result = sess.run([output_name], {input_name: x_test[:8000]})\n", + "print(\"time: \", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "with open('dumps/vgg16_ort_dump', 'wb') as fp:\n", + " pickle.dump(ort_result, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/vgg16_ort_dump', 'rb') as fp:\n", + " ort_res = pickle.load(fp)\n", + "with open ('dumps/vgg16_keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(ort_res[0], keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 1) #using decimal of 3 would pass test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hpvm/projects/onnx/src/vgg16_tvm.ipynb b/hpvm/projects/onnx/src/vgg16_tvm.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8cea48d44615e3d407d25b943df6dbb86923a2d0 --- /dev/null +++ b/hpvm/projects/onnx/src/vgg16_tvm.ipynb @@ -0,0 +1,240 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from keras.datasets import cifar10\n", + "from keras import backend as K\n", + "import sys\n", + "import struct\n", + "import numpy as np\n", + "import os\n", + "import tvm\n", + "import tvm.relay as relay\n", + "from tvm.contrib import graph_runtime\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "K.set_image_data_format('channels_first')\n", + "\n", + "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", + "x_test = x_test.astype('float32')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#print(X_test.shape)\n", + "#X_test = X_test[:8000]\n", + "#print(X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import onnx\n", + "onnx_model = onnx.load(\"../models/keras/vgg16_cifar10.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "target='cuda -libs=cudnn,cublas'\n", + "input_name = 'conv2d_8_input'\n", + "shape_dict = {input_name: X_test[:1000].shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "ctx = tvm.gpu()\n", + "with relay.build_config(opt_level=3):\n", + " executor = relay.build_module.create_executor('graph', mod, ctx, target)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# LLVM EXECUTE SUCCEEDED\n", + "import time\n", + "start_time = time.time()\n", + "tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')[:1000]), **params)\n", + "print(\"Time:\", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n", + "import pickle\n", + "with open('dumps/tvm_dump', 'wb') as fp:\n", + " pickle.dump(tvm_output, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", + "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (6000, 512, 'float32'), (10, 512, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (6000, 512, 'float32'), (512, 512, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 512, 2, 2, 'float32'), (512, 512, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 512, 4, 4, 'float32'), (512, 512, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 256, 4, 4, 'float32'), (512, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 256, 8, 8, 'float32'), (256, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 128, 8, 8, 'float32'), (256, 128, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 128, 16, 16, 'float32'), (128, 128, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 64, 16, 16, 'float32'), (128, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 64, 32, 32, 'float32'), (64, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", + "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 3, 32, 32, 'float32'), (64, 3, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" + ] + } + ], + "source": [ + "input_name = 'conv2d_1_input'\n", + "input_size = 6000\n", + "shape_dict = {input_name: x_test[:input_size].shape}\n", + "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", + "target = 'cuda -libs=cudnn,cublas'\n", + "with relay.build_config(opt_level=3):\n", + " graph, lib, params = relay.build(mod, target, params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "ctx = tvm.gpu()\n", + "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", + "# create module\n", + "module = graph_runtime.create(graph, lib, ctx)\n", + "# set input and parameters\n", + "module.set_input(\"conv2d_1_input\", x_test[:input_size])\n", + "module.set_input(**params)\n", + "# run\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time: 0.2862420082092285\n" + ] + } + ], + "source": [ + "start_time = time.time()\n", + "module.run()\n", + "out_shape = (input_size, 10)\n", + "# get output\n", + "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", + "print(\"Time:\", time.time() - start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import numpy as np\n", + "with open ('dumps/tvm_dump', 'rb') as fp:\n", + " tvm_res = pickle.load(fp)\n", + "with open ('dumps/keras_dump', 'rb') as fp:\n", + " keras_res = pickle.load(fp)\n", + "\n", + "for ref_o, o in zip(tvm_res, keras_res):\n", + " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test\n", + "print(\"Accuracy matched!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}