From 9930b3c7364434caeb14fae7f8076b52d0858ea2 Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Sat, 23 Jan 2021 04:58:34 -0600
Subject: [PATCH] Added some example models

---
 .gitignore               |  9 +---
 bin/show_baseline_acc.py | 37 +++++++++++++++
 model_zoo/__init__.py    |  4 ++
 model_zoo/_container.py  | 37 +++++++++++++++
 model_zoo/alexnet.py     | 30 ++++++++++++
 model_zoo/datasets.py    | 99 ++++++++++++++++++++++++++++++++++++++++
 model_zoo/lenet.py       | 16 +++++++
 model_zoo/vgg16.py       | 39 ++++++++++++++++
 predtuner/__init__.py    |  6 +++
 test/test_torchapp.py    | 23 ++++------
 10 files changed, 280 insertions(+), 20 deletions(-)
 create mode 100644 bin/show_baseline_acc.py
 create mode 100644 model_zoo/__init__.py
 create mode 100644 model_zoo/_container.py
 create mode 100644 model_zoo/alexnet.py
 create mode 100644 model_zoo/datasets.py
 create mode 100644 model_zoo/lenet.py
 create mode 100644 model_zoo/vgg16.py

diff --git a/.gitignore b/.gitignore
index d41f32f..a298af1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,15 +21,10 @@ doc/build
 # Custom
 .idea/
 .vscode/
-/data/
-model_params
+model_data
 results/
 tuner_results
 tuner_results/
 *.sh
 *.ipynb
-logistics/
-!tuner_data/global_knobs.txt
-!tuner_data/vision_app.json
-/*.yaml
-!env.yaml
\ No newline at end of file
+logistics/
\ No newline at end of file
diff --git a/bin/show_baseline_acc.py b/bin/show_baseline_acc.py
new file mode 100644
index 0000000..08a90fd
--- /dev/null
+++ b/bin/show_baseline_acc.py
@@ -0,0 +1,37 @@
+import site
+from pathlib import Path
+
+import torch
+from torch.nn.modules.module import Module
+from torch.utils.data.dataloader import DataLoader
+from torch.utils.data.dataset import Subset
+
+site.addsitedir(Path(__file__).absolute().parent.parent)
+import model_zoo as net
+from predtuner import TorchApp, accuracy, get_knobs_from_file
+
+
+def load_from_default_path(cls, prefix: str):
+    return cls.from_file(f"{prefix}/input.bin", f"{prefix}/labels.bin")
+
+
+mnist = load_from_default_path(net.MNIST, "model_data/mnist")
+cifar10 = load_from_default_path(net.CIFAR, "model_data/cifar10")
+cifar100 = load_from_default_path(net.CIFAR, "model_data/cifar100")
+imagenet = load_from_default_path(net.ImageNet, "model_data/imagenet")
+
+networks_in_folder = {
+    "lenet_mnist": (net.LeNet, mnist),
+    "alexnet_cifar10": (net.AlexNet, cifar10),
+    "alexnet2_cifar10": (net.AlexNet2, cifar10),
+    "vgg16_cifar10": (net.VGG16Cifar10, cifar10),
+    "vgg16_cifar100": (net.VGG16Cifar100, cifar100),
+}
+
+for name, (cls, dataset) in networks_in_folder.items():
+    network: Module = cls()
+    network.load_state_dict(torch.load(f"model_data/{name}.pth.tar"))
+    d1, d2 = DataLoader(Subset(dataset, range(5000, 10000)), 1), DataLoader(dataset, 1)
+    app = TorchApp("", network, d1, d2, get_knobs_from_file(), accuracy)
+    qos, _ = app.measure_qos_perf({}, False)
+    print(f"{name} -> {qos}")
diff --git a/model_zoo/__init__.py b/model_zoo/__init__.py
new file mode 100644
index 0000000..375e4ac
--- /dev/null
+++ b/model_zoo/__init__.py
@@ -0,0 +1,4 @@
+from .alexnet import AlexNet, AlexNet2
+from .datasets import CIFAR, MNIST, ImageNet
+from .lenet import LeNet
+from .vgg16 import VGG16Cifar10, VGG16Cifar100
diff --git a/model_zoo/_container.py b/model_zoo/_container.py
new file mode 100644
index 0000000..6f38094
--- /dev/null
+++ b/model_zoo/_container.py
@@ -0,0 +1,37 @@
+from typing import Callable, Optional
+
+import torch
+from torch.nn import Conv2d, MaxPool2d, Module, Sequential, Softmax
+
+ActivT = Optional[Callable[[], Module]]
+
+
+def make_conv_pool_activ(
+    in_channels: int,
+    out_channels: int,
+    kernel_size: int,
+    activation: ActivT = None,
+    pool_size: Optional[int] = None,
+    pool_stride: Optional[int] = None,
+    **conv_kwargs
+):
+    layers = [Conv2d(in_channels, out_channels, kernel_size, **conv_kwargs)]
+    if pool_size is not None:
+        layers.append(MaxPool2d(pool_size, stride=pool_stride))
+    if activation:
+        layers.append(activation())
+    return layers
+
+
+class Classifier(Module):
+    def __init__(
+        self, convs: Sequential, linears: Sequential, use_softmax: bool = False
+    ):
+        super().__init__()
+        self.convs = convs
+        self.linears = linears
+        self.softmax = Softmax(1) if use_softmax else Sequential()
+
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        outputs = self.convs(inputs)
+        return self.softmax(self.linears(outputs.view(outputs.shape[0], -1)))
diff --git a/model_zoo/alexnet.py b/model_zoo/alexnet.py
new file mode 100644
index 0000000..9256139
--- /dev/null
+++ b/model_zoo/alexnet.py
@@ -0,0 +1,30 @@
+from torch.nn import Linear, ReLU, Sequential, Tanh
+
+from ._container import Classifier, make_conv_pool_activ
+
+
+class AlexNet(Classifier):
+    def __init__(self):
+        convs = Sequential(
+            *make_conv_pool_activ(3, 64, 11, Tanh, pool_size=2, padding=5),
+            *make_conv_pool_activ(64, 192, 5, Tanh, pool_size=2, padding=2),
+            *make_conv_pool_activ(192, 384, 3, Tanh, padding=1),
+            *make_conv_pool_activ(384, 256, 3, Tanh, padding=1),
+            *make_conv_pool_activ(256, 256, 3, Tanh, pool_size=2, padding=1)
+        )
+        linears = Sequential(Linear(4096, 10))
+        super().__init__(convs, linears)
+
+
+class AlexNet2(Classifier):
+    def __init__(self):
+        convs = Sequential(
+            *make_conv_pool_activ(3, 32, 3, Tanh, padding=1),
+            *make_conv_pool_activ(32, 32, 3, Tanh, pool_size=2, padding=1),
+            *make_conv_pool_activ(32, 64, 3, Tanh, padding=1),
+            *make_conv_pool_activ(64, 64, 3, Tanh, pool_size=2, padding=1),
+            *make_conv_pool_activ(64, 128, 3, Tanh, padding=1),
+            *make_conv_pool_activ(128, 128, 3, Tanh, pool_size=2, padding=1)
+        )
+        linears = Sequential(Linear(2048, 10))
+        super().__init__(convs, linears)
diff --git a/model_zoo/datasets.py b/model_zoo/datasets.py
new file mode 100644
index 0000000..d5fd84e
--- /dev/null
+++ b/model_zoo/datasets.py
@@ -0,0 +1,99 @@
+import logging
+from pathlib import Path
+from typing import Iterator, Tuple, Union
+
+import numpy as np
+import torch
+from torch.utils.data.dataset import Dataset
+
+RetT = Tuple[torch.Tensor, torch.Tensor]
+msg_logger = logging.getLogger()
+
+PathLike = Union[Path, str]
+
+
+class SingleFileDataset(Dataset):
+    def __init__(self, inputs: torch.Tensor, outputs: torch.Tensor):
+        self.inputs, self.outputs = inputs, outputs
+
+    @classmethod
+    def from_file(cls, *args, **kwargs):
+        pass
+
+    @property
+    def sample_input(self):
+        inputs, outputs = next(iter(self))
+        return inputs
+
+    def __len__(self) -> int:
+        return len(self.inputs)
+
+    def __getitem__(self, idx) -> RetT:
+        return self.inputs[idx], self.outputs[idx]
+
+    def __iter__(self) -> Iterator[RetT]:
+        for i in range(len(self)):
+            yield self[i]
+
+
+class DNNDataset(SingleFileDataset):
+    image_shape = None
+    label_ty = np.int32
+
+    @classmethod
+    def from_file(
+        cls,
+        input_file: PathLike,
+        labels_file: PathLike,
+        count: int = -1,
+        offset: int = 0,
+    ):
+        # NOTE: assuming (N, *) ordering of inputs (such as NCHW, NHWC)
+        channel_size = np.prod(np.array(cls.image_shape))
+        inputs_count_byte = -1 if count == -1 else count * channel_size
+        inputs = read_tensor_from_file(
+            input_file,
+            -1,
+            *cls.image_shape,
+            count=inputs_count_byte,
+            offset=offset * channel_size,
+        )
+        labels = read_tensor_from_file(
+            labels_file,
+            -1,
+            read_ty=cls.label_ty,
+            cast_ty=np.long,
+            count=count,
+            offset=offset,
+        )
+        if inputs.shape[0] != labels.shape[0]:
+            raise ValueError("Input and output have different number of data points")
+        msg_logger.info(f"%d entries loaded from dataset.", inputs.shape[0])
+        return cls(inputs, labels)
+
+
+class MNIST(DNNDataset):
+    image_shape = 1, 28, 28
+
+
+class CIFAR(DNNDataset):
+    image_shape = 3, 32, 32
+
+
+class ImageNet(DNNDataset):
+    image_shape = 3, 224, 224
+
+
+def read_tensor_from_file(
+    filename: Union[str, Path],
+    *shape: int,
+    read_ty=np.float32,
+    cast_ty=np.float32,
+    count: int = -1,
+    offset: int = 0,
+) -> torch.Tensor:
+    offset = offset * read_ty().itemsize
+    mmap = np.memmap(filename, dtype=read_ty, mode="r", offset=offset)
+    n_entries = min(mmap.shape[0], count) if count != -1 else mmap.shape[0]
+    np_array = mmap[:n_entries].reshape(shape).astype(cast_ty)
+    return torch.from_numpy(np_array).clone()
diff --git a/model_zoo/lenet.py b/model_zoo/lenet.py
new file mode 100644
index 0000000..bf0a69a
--- /dev/null
+++ b/model_zoo/lenet.py
@@ -0,0 +1,16 @@
+from torch.nn import Linear, Sequential, Tanh
+
+from ._container import Classifier, make_conv_pool_activ
+
+
+class LeNet(Classifier):
+    def __init__(self):
+        convs = Sequential(
+            *make_conv_pool_activ(1, 32, 5, Tanh, 2, padding=2),
+            *make_conv_pool_activ(32, 64, 5, Tanh, 2, padding=2)
+        )
+        linears = Sequential(
+            Linear(7 * 7 * 64, 1024), Tanh(),
+            Linear(1024, 10), Tanh()
+        )
+        super().__init__(convs, linears)
diff --git a/model_zoo/vgg16.py b/model_zoo/vgg16.py
new file mode 100644
index 0000000..1a33d31
--- /dev/null
+++ b/model_zoo/vgg16.py
@@ -0,0 +1,39 @@
+from typing import Iterable
+
+from torch.nn import Linear, ReLU, Sequential
+
+from ._container import Classifier, make_conv_pool_activ
+
+
+class _VGG16(Classifier):
+    def __init__(self, linear_inouts: Iterable[int]):
+        convs = Sequential(
+            *make_conv_pool_activ(3, 64, 3, ReLU, padding=1),
+            *make_conv_pool_activ(64, 64, 3, ReLU, 2, padding=1),
+            *make_conv_pool_activ(64, 128, 3, ReLU, padding=1),
+            *make_conv_pool_activ(128, 128, 3, ReLU, 2, padding=1),
+            *make_conv_pool_activ(128, 256, 3, ReLU, padding=1),
+            *make_conv_pool_activ(256, 256, 3, ReLU, padding=1),
+            *make_conv_pool_activ(256, 256, 3, ReLU, 2, padding=1),
+            *make_conv_pool_activ(256, 512, 3, ReLU, padding=1),
+            *make_conv_pool_activ(512, 512, 3, ReLU, padding=1),
+            *make_conv_pool_activ(512, 512, 3, ReLU, 2, padding=1),
+            *make_conv_pool_activ(512, 512, 3, ReLU, padding=1),
+            *make_conv_pool_activ(512, 512, 3, ReLU, padding=1),
+            *make_conv_pool_activ(512, 512, 3, ReLU, 2, padding=1)
+        )
+        linear_layers = [Linear(in_, out) for in_, out in zip(linear_inouts, linear_inouts[1:])]
+        linear_relus = [ReLU() for _ in range(2 * len(linear_layers) - 1)]
+        linear_relus[::2] = linear_layers
+        linears = Sequential(*linear_relus)
+        super().__init__(convs, linears)
+
+
+class VGG16Cifar10(_VGG16):
+    def __init__(self):
+        super().__init__([512, 512, 10])
+
+
+class VGG16Cifar100(_VGG16):
+    def __init__(self):
+        super().__init__([512, 512, 100])
diff --git a/predtuner/__init__.py b/predtuner/__init__.py
index e69de29..9d13bc6 100644
--- a/predtuner/__init__.py
+++ b/predtuner/__init__.py
@@ -0,0 +1,6 @@
+from .approxapp import ApproxApp, ApproxKnob, ApproxTuner
+from .approxes import get_knobs_from_file
+from .modeledapp import (IPerfModel, IQoSModel, LinearPerfModel, ModeledApp,
+                         QoSModelP1, QoSModelP2)
+from .torchapp import TorchApp, TorchApproxKnob
+from .torchutil import accuracy
diff --git a/test/test_torchapp.py b/test/test_torchapp.py
index a77715e..0184093 100644
--- a/test/test_torchapp.py
+++ b/test/test_torchapp.py
@@ -1,4 +1,5 @@
 import unittest
+import torch
 
 from torch.utils.data.dataset import Subset
 
@@ -7,21 +8,15 @@ from predtuner.torchapp import TorchApp
 from predtuner.torchutil import accuracy
 from torch.nn import Conv2d, Linear
 from torch.utils.data.dataloader import DataLoader
-from torchvision import transforms
-from torchvision.datasets import CIFAR10
-from torchvision.models.vgg import vgg16
+from model_zoo import VGG16Cifar10, CIFAR
 
 
 class TestTorchApp(unittest.TestCase):
     def setUp(self):
-        normalize = transforms.Normalize(
-            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-        )
-        transform = transforms.Compose([transforms.ToTensor(), normalize])
-
-        dataset = CIFAR10("/tmp/cifar10", download=True, transform=transform)
+        dataset = CIFAR.from_file("model_data/cifar10/input.bin", "model_data/cifar10/labels.bin")
         self.dataset = Subset(dataset, range(100))
-        self.module = vgg16(pretrained=True)
+        self.module = VGG16Cifar10()
+        self.module.load_state_dict(torch.load("model_data/vgg16_cifar10.pth.tar"))
 
     def get_app(self):
         return TorchApp(
@@ -36,6 +31,7 @@ class TestTorchApp(unittest.TestCase):
     def test_init(self):
         app = self.get_app()
         n_knobs = {op: len(ks) for op, ks in app.op_knobs.items()}
+        self.assertEqual(len(n_knobs), 34)
         for op_name, op in app.midx.name_to_module.items():
             if isinstance(op, Conv2d):
                 nknob = 56
@@ -45,9 +41,10 @@ class TestTorchApp(unittest.TestCase):
                 nknob = 1
             self.assertEqual(n_knobs[op_name], nknob)
 
-    # def test_baseline_qos(self):
-    #     app = self.get_app()
-    #     qos, _ = app.measure_qos_perf({}, False)
+    def test_baseline_qos(self):
+        app = self.get_app()
+        qos, _ = app.measure_qos_perf({}, False)
+        self.assertAlmostEqual(qos, 0.88)
 
     def test_tuning(self):
         app = TorchApp(
-- 
GitLab