diff --git a/hpvm/test/epoch_dnn/assets/yolo/atr_dataset.tar.gz b/hpvm/test/epoch_dnn/assets/yolo/atr_dataset.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9f8f6d5c6b127935600427a08312ecc54b7b2524
Binary files /dev/null and b/hpvm/test/epoch_dnn/assets/yolo/atr_dataset.tar.gz differ
diff --git a/hpvm/test/epoch_dnn/assets/yolo/scales.txt b/hpvm/test/epoch_dnn/assets/yolo/scales.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e4bc014c3d2d092a5a6c2187be5b8d8f9a66e87
--- /dev/null
+++ b/hpvm/test/epoch_dnn/assets/yolo/scales.txt
@@ -0,0 +1,50 @@
+input:	0.007874015748031496
+conv1:	0.06992515383252429
+add1:	0.06992515383252429
+batch1:	0.2993613302727509
+add2:	0.2993613302727509
+relu1:	0.2760873789424184
+pool1:	0.2760873789424184
+conv2:	1.7186674777227626
+add3:	1.7186674777227626
+batch2:	0.28008715327870537
+add4:	0.28008715327870537
+relu2:	0.28008715327870537
+pool2:	0.28008715327870537
+conv3:	2.2675250452397657
+add5:	2.2675250452397657
+batch3:	0.31653867929469365
+add6:	0.31653867929469365
+relu3:	0.24789561100538912
+pool3:	0.24789561100538912
+conv4:	2.9807093186696476
+add7:	2.9807093186696476
+batch4:	0.21296565215572236
+add8:	0.21296565215572236
+relu4:	0.1663890279110257
+pool4:	0.1663890279110257
+conv5:	2.988272911994118
+add9:	2.988272911994118
+batch5:	0.21199497273731988
+add10:	0.21199497273731988
+relu5:	0.21199497273731988
+pool5:	0.21199497273731988
+conv6:	3.519411158090996
+add11:	3.519411158090996
+batch6:	0.19962375980710337
+add12:	0.19962375980710337
+relu6:	0.19962375980710337
+pool6:	0.19962375980710337
+add13:	0.19962375980710337
+conv7:	17.843374345294812
+add14:	17.843374345294812
+batch7:	0.7490898030888518
+add15:	0.7490898030888518
+relu7:	0.7490898030888518
+conv8:	57.37602537549188
+add16:	57.37602537549188
+batch8:	0.30873182541207267
+add17:	0.30873182541207267
+relu8:	0.2930820529042543
+conv9:	20.397035890622195
+add18:	20.397035890622195
diff --git a/hpvm/test/epoch_dnn/torch_dnn/yolo/__init__.py b/hpvm/test/epoch_dnn/torch_dnn/yolo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3477ac9e6280625b8e0fcadd6b239111541d346
--- /dev/null
+++ b/hpvm/test/epoch_dnn/torch_dnn/yolo/__init__.py
@@ -0,0 +1,3 @@
+from .dataset import CompressedATRDataset, DefaultTransforms
+from .loss import RegionLoss
+from .model import TinyYoloPL
diff --git a/hpvm/test/epoch_dnn/torch_dnn/yolo/dataset.py b/hpvm/test/epoch_dnn/torch_dnn/yolo/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..98a99d8500d6fe0ef7f2143856cc7c8c99564221
--- /dev/null
+++ b/hpvm/test/epoch_dnn/torch_dnn/yolo/dataset.py
@@ -0,0 +1,130 @@
+import logging
+import tarfile
+from pathlib import Path
+from typing import Union
+
+import imgaug.augmenters as iaa
+import numpy as np
+import torch
+import torchvision.transforms as transforms
+from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
+from PIL import Image
+from torch.utils.data import Dataset
+
+logger = logging.getLogger(__name__)
+PathLike = Union[Path, str]
+
+
+class CompressedATRDataset(Dataset):
+    loaded_size = 640, 480
+    output_size = 640, 640
+
+    def __init__(
+        self,
+        tar_dataset: PathLike,
+        transform=None,
+        n_boxes: int = 10,
+        extract_to: PathLike = None,
+    ):
+        from tempfile import TemporaryDirectory
+
+        self.tar_dataset = Path(tar_dataset)
+        if extract_to is None:
+            temp_dir = TemporaryDirectory()
+            temp_dir.cleanup()  # Cleans up on GC
+            self.temp_dir = Path(temp_dir.name)
+        else:
+            self.temp_dir = Path(extract_to)
+        tar = tarfile.open(self.tar_dataset)
+        tar.extractall(path=self.temp_dir)
+        tar.close()
+        self.image_files = list(self.temp_dir.glob("**/*.png"))
+        logger.info("Loaded %d images", len(self.image_files))
+        self.transform = transform
+        self.n_ret_boxes = n_boxes
+
+    def __getitem__(self, index):
+        image_path = self.image_files[index]
+        image = Image.open(image_path).convert("RGB")
+        assert image.size == self.loaded_size
+        boxes_tensor = self._read_boxes(image_path.with_suffix(".txt"), image)
+        if self.transform is not None:
+            image_np, boxes_tensor = self.transform(np.array(image), boxes_tensor)
+        image_tensor = transforms.ToTensor()(image_np)
+        return image_tensor, boxes_tensor, image_path.as_posix()
+
+    def _read_boxes(self, path: Path, image: Image.Image):
+        boxes = np.loadtxt(path).reshape(-1, 5)
+        boxes[:, [1, 3]] /= image.width
+        boxes[:, [2, 4]] /= image.height
+        assert boxes.shape[0] <= 10
+        n_padding = 10 - boxes.shape[0]
+        padding_tensor = np.zeros((n_padding, 5), dtype=float)
+        padding_tensor[:, 0] = -1
+        boxes = np.concatenate((boxes, padding_tensor), axis=0)
+        return torch.tensor(boxes, dtype=torch.float)
+
+    def __len__(self):
+        return len(self.image_files)
+
+
+class DefaultTransforms:
+    def __init__(self):
+        self.augmentations = iaa.PadToAspectRatio(
+            1.0, position="center-center"
+        ).to_deterministic()
+
+    def __call__(self, img, boxes):
+        # Convert xywh to xyxy
+        boxes = np.array(boxes)
+        boxes[:, 1:] = xywh2xyxy_np(boxes[:, 1:])
+
+        # Convert bounding boxes to imgaug
+        bounding_boxes = BoundingBoxesOnImage(
+            [BoundingBox(*box[1:], label=box[0]) for box in boxes], shape=img.shape
+        )
+
+        # Apply augmentations
+        img, bounding_boxes = self.augmentations(
+            image=img, bounding_boxes=bounding_boxes
+        )
+
+        # Clip out of image boxes
+        bounding_boxes = bounding_boxes.clip_out_of_image()
+
+        # Convert bounding boxes back to numpy
+        boxes = np.zeros((len(bounding_boxes), 5))
+        for box_idx, box in enumerate(bounding_boxes):
+            # Extract coordinates for unpadded + unscaled image
+            x1 = box.x1
+            y1 = box.y1
+            x2 = box.x2
+            y2 = box.y2
+
+            # Returns (x, y, w, h)
+            boxes[box_idx, 0] = box.label
+            boxes[box_idx, 1] = (x1 + x2) / 2
+            boxes[box_idx, 2] = (y1 + y2) / 2
+            boxes[box_idx, 3] = x2 - x1
+            boxes[box_idx, 4] = y2 - y1
+
+        return img, boxes
+
+
+def xywh2xyxy_np(x: np.ndarray):
+    y: np.ndarray = np.zeros_like(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2
+    y[..., 1] = x[..., 1] - x[..., 3] / 2
+    y[..., 2] = x[..., 0] + x[..., 2] / 2
+    y[..., 3] = x[..., 1] + x[..., 3] / 2
+    return y
+
+
+def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
+    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
+    y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
+    y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
+    y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
+    return y
diff --git a/hpvm/test/epoch_dnn/torch_dnn/yolo/loss.py b/hpvm/test/epoch_dnn/torch_dnn/yolo/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..792967a81cc5e67e4b7795f33f907e4003e72621
--- /dev/null
+++ b/hpvm/test/epoch_dnn/torch_dnn/yolo/loss.py
@@ -0,0 +1,499 @@
+#
+#   Darknet RegionLoss
+#   Copyright EAVISE
+#
+
+import logging
+import math
+from distutils.version import LooseVersion
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+try:
+    import pandas as pd
+except ModuleNotFoundError:
+    pd = None
+
+
+__all__ = ["RegionLoss"]
+log = logging.getLogger(__name__)
+torchversion = LooseVersion(torch.__version__)
+version120 = LooseVersion("1.2.0")
+
+
+class RegionLoss(nn.modules.loss._Loss):
+    """Computes region loss from darknet network output and target annotation (yoloV2).
+
+    Args:
+        num_classes (int): number of classes to detect
+        anchors (list): 2D list representing anchor boxes (see :class:`lightnet.network.Darknet`)
+        stride (optional, int): The downsampling factor of the network (input_dimension / output_dimension); Default **32**
+        seen (optional, torch.Tensor): How many images the network has already been trained on; Default **0**
+        coord_scale (optional, float): weight of bounding box coordinates; Default **1.0**
+        noobject_scale (optional, float): weight of regions without target boxes; Default **1.0**
+        object_scale (optional, float): weight of regions with target boxes; Default **5.0**
+        class_scale (optional, float): weight of categorical predictions; Default **1.0**
+        thresh (optional, float): minimum iou between a predicted box and ground truth for them to be considered matching; Default **0.6**
+        coord_prefill (optional, int): This parameter controls for how many training samples the network will prefill the target coordinates, biassing the network to predict the center at **.5,.5**; Default **12800**
+    """
+
+    def __init__(
+        self,
+        num_classes,
+        anchors,
+        stride=32,
+        seen=0,
+        coord_scale=1.0,
+        noobject_scale=1.0,
+        object_scale=5.0,
+        class_scale=1.0,
+        thresh=0.6,
+        coord_prefill=12800,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.stride = stride
+        self.num_anchors = len(anchors)
+        self.anchor_step = len(anchors[0])
+        self.anchors = torch.tensor(anchors, dtype=torch.float, requires_grad=False)
+        self.register_buffer("seen", torch.tensor(seen))
+
+        self.coord_scale = coord_scale
+        self.noobject_scale = noobject_scale
+        self.object_scale = object_scale
+        self.class_scale = class_scale
+        self.thresh = thresh
+        self.coord_prefill = coord_prefill
+
+        self.mse = nn.MSELoss(reduction="sum")
+        self.cel = nn.CrossEntropyLoss(reduction="sum")
+
+        self.loss_total = torch.tensor(0.0)
+        self.loss_conf = torch.tensor(0.0)
+        self.loss_coord = torch.tensor(0.0)
+        self.loss_class = torch.tensor(0.0)
+
+    @property
+    def values(self):
+        """Return detached sub-losses in a dictionary.
+
+        Note:
+            You can access the individual loss values directly as ``object.loss_<name>`` as well. |br|
+            This will return the actual loss tensor with its attached computational graph and gives you full freedom for modifying this loss prior to the backward pass.
+        """
+        return {
+            "total": self.loss_total.detach(),
+            "conf": self.loss_conf.detach(),
+            "coord": self.loss_coord.detach(),
+            "class": self.loss_class.detach(),
+        }
+
+    @property
+    def loss(self):
+        log.deprecated('The "loss" attribute is deprecated in favor for "loss_total"')
+        return self.loss_total
+
+    def extra_repr(self):
+        repr_str = f"classes={self.num_classes}, stride={self.stride}, threshold={self.thresh}, seen={self.seen.item()}\n"
+        repr_str += f"coord_scale={self.coord_scale}, object_scale={self.object_scale}, noobject_scale={self.noobject_scale}, class_scale={self.class_scale}\n"
+        repr_str += f"anchors="
+        for a in self.anchors:
+            repr_str += f"[{a[0]:.5g}, {a[1]:.5g}] "
+        return repr_str
+
+    def forward(self, output, target, seen=None):
+        """ Compute Region loss.
+
+        Args:
+            output (torch.autograd.Variable): Output from the network
+            target (brambox annotation dataframe or torch.Tensor): Brambox annotations or tensor containing the annotation targets (see :class:`lightnet.data.BramboxToTensor`)
+            seen (int, optional): How many images the network has already been trained on; Default **Add batch_size to previous seen value**
+
+        Note:
+            If using a target tensor, it should have the dimensions `[num_batch, num_anno, 5]` and following format per image:
+
+            .. math::
+
+                \\begin{bmatrix}
+                    class\\_idx & x\\_center & y\\_center & width & height \\\\
+                    class\\_idx & x\\_center & y\\_center & width & height \\\\
+                    ... \\\\
+                    -1 & 0 & 0 & 0 & 0 \\\\
+                    -1 & 0 & 0 & 0 & 0 \\\\
+                    ...
+                \\end{bmatrix}
+
+            With all coordinates being relative to the image size. |br|
+            Since the annotations from all images of a batch should be made of the same length, you can pad them with: `[-1, 0, 0, 0, 0]`.
+
+        Note:
+            Besides being easier to work with, brambox dataframes have the added benefit that
+            this loss function will also consider the ``ignore`` flag of annotations and ignore detections that match with it.
+            This allows you to have annotations that will not influence the loss in any way,
+            as opposed to having them removed and counting them as false detections.
+        """
+        # Parameters
+        nB = output.data.size(0)
+        nA = self.num_anchors
+        nC = self.num_classes
+        nH = output.data.size(2)
+        nW = output.data.size(3)
+        nPixels = nH * nW
+        device = output.device
+        if seen is not None:
+            self.seen = torch.tensor(seen)
+        elif self.training:
+            self.seen += nB
+
+        # Get x,y,w,h,conf,cls
+        output = output.view(nB, nA, -1, nPixels)
+        coord = torch.zeros_like(output[:, :, :4])
+        coord[:, :, :2] = output[:, :, :2].sigmoid()  # tx,ty
+        coord[:, :, 2:4] = output[:, :, 2:4]  # tw,th
+        conf = output[:, :, 4].sigmoid()
+        if nC > 1:
+            cls = (
+                output[:, :, 5:]
+                .contiguous()
+                .view(nB * nA, nC, nPixels)
+                .transpose(1, 2)
+                .contiguous()
+                .view(-1, nC)
+            )
+
+        # Create prediction boxes
+        pred_boxes = torch.FloatTensor(nB * nA * nPixels, 4)
+        lin_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).view(nPixels).to(device)
+        lin_y = (
+            torch.linspace(0, nH - 1, nH)
+            .view(nH, 1)
+            .repeat(1, nW)
+            .view(nPixels)
+            .to(device)
+        )
+        anchor_w = self.anchors[:, 0].contiguous().view(nA, 1).to(device)
+        anchor_h = self.anchors[:, 1].contiguous().view(nA, 1).to(device)
+
+        pred_boxes[:, 0] = (coord[:, :, 0].detach() + lin_x).view(-1)
+        pred_boxes[:, 1] = (coord[:, :, 1].detach() + lin_y).view(-1)
+        pred_boxes[:, 2] = (coord[:, :, 2].detach().exp() * anchor_w).view(-1)
+        pred_boxes[:, 3] = (coord[:, :, 3].detach().exp() * anchor_h).view(-1)
+        pred_boxes = pred_boxes.cpu()
+
+        # Get target values
+        coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(
+            pred_boxes, target, nB, nH, nW
+        )
+        coord_mask = coord_mask.expand_as(tcoord).to(device).sqrt()
+        conf_mask = conf_mask.to(device).sqrt()
+        tcoord = tcoord.to(device)
+        tconf = tconf.to(device)
+        if nC > 1:
+            tcls = tcls[cls_mask].view(-1).long().to(device)
+            cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(device)
+            cls = cls[cls_mask].view(-1, nC)
+
+        # Compute losses
+        self.loss_coord = (
+            self.coord_scale
+            * self.mse(coord * coord_mask, tcoord * coord_mask)
+            / (2 * nB)
+        )
+        self.loss_conf = self.mse(conf * conf_mask, tconf * conf_mask) / (2 * nB)
+        if nC > 1:
+            if tcls.numel() > 0:
+                self.loss_class = self.class_scale * self.cel(cls, tcls) / nB
+            else:
+                self.loss_class = torch.tensor(0.0, device=device)
+        else:
+            self.loss_class = torch.tensor(0.0, device=device)
+
+        self.loss_total = self.loss_coord + self.loss_conf + self.loss_class
+        return self.loss_total
+
+    def build_targets(self, pred_boxes, ground_truth, nB, nH, nW):
+        """Compare prediction boxes and targets, convert targets to network output tensors"""
+        if torch.is_tensor(ground_truth):
+            return self.__build_targets_tensor(pred_boxes, ground_truth, nB, nH, nW)
+        elif pd is not None and isinstance(ground_truth, pd.DataFrame):
+            return self.__build_targets_brambox(pred_boxes, ground_truth, nB, nH, nW)
+        else:
+            raise TypeError(f"Unkown ground truth format [{type(ground_truth)}]")
+
+    def __build_targets_tensor(self, pred_boxes, ground_truth, nB, nH, nW):
+        """Compare prediction boxes and ground truths, convert ground truths to network output tensors"""
+        # Parameters
+        nT = ground_truth.size(1)
+        nA = self.num_anchors
+        nAnchors = nA * nH * nW
+        nPixels = nH * nW
+
+        # Tensors
+        coord_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False)
+        conf_mask = (
+            torch.ones(nB, nA, nH, nW, requires_grad=False) * self.noobject_scale
+        )
+        if torchversion >= version120:
+            cls_mask = torch.zeros(
+                nB, nA, nH, nW, dtype=torch.bool, requires_grad=False
+            )
+        else:
+            cls_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False).byte()
+        tcoord = torch.zeros(nB, nA, 4, nH, nW, requires_grad=False)
+        tconf = torch.zeros(nB, nA, nH, nW, requires_grad=False)
+        tcls = torch.zeros(nB, nA, nH, nW, requires_grad=False)
+
+        if self.training and self.seen < self.coord_prefill:
+            coord_mask.fill_(math.sqrt(0.01 / self.coord_scale))
+            if self.anchor_step == 4:
+                tcoord[:, :, 0] = (
+                    self.anchors[:, 2]
+                    .contiguous()
+                    .view(1, nA, 1, 1)
+                    .repeat(nB, 1, 1, nPixels)
+                )
+                tcoord[:, :, 1] = (
+                    self.anchors[:, 3]
+                    .contiguous()
+                    .view(1, nA, 1, 1)
+                    .repeat(nB, 1, 1, nPixels)
+                )
+            else:
+                tcoord[:, :, 0].fill_(0.5)
+                tcoord[:, :, 1].fill_(0.5)
+
+        # Anchors
+        if self.anchor_step == 4:
+            anchors = self.anchors.clone()
+            anchors[:, :2] = 0
+        else:
+            anchors = torch.cat([torch.zeros_like(self.anchors), self.anchors], 1)
+
+        # Loop over GT
+        for b in range(nB):
+            gt = ground_truth[b][
+                (ground_truth[b, :, 0] >= 0)[:, None].expand_as(ground_truth[b])
+            ].view(-1, 5)
+            if gt.numel() == 0:  # No gt for this image
+                continue
+
+            # Build up tensors
+            cur_pred_boxes = pred_boxes[b * nAnchors : (b + 1) * nAnchors]
+            gt = gt[:, 1:]
+            gt[:, ::2] *= nW
+            gt[:, 1::2] *= nH
+
+            # Set confidence mask of matching detections to 0
+            iou_gt_pred = bbox_ious(gt, cur_pred_boxes)
+            mask = (iou_gt_pred > self.thresh).sum(0) >= 1
+            conf_mask[b][mask.view_as(conf_mask[b])] = 0
+
+            # Find best anchor for each gt
+            iou_gt_anchors = bbox_wh_ious(gt, anchors)
+            _, best_anchors = iou_gt_anchors.max(1)
+
+            # Set masks and target values for each gt
+            nGT = gt.shape[0]
+            gi = gt[:, 0].clamp(0, nW - 1).long()
+            gj = gt[:, 1].clamp(0, nH - 1).long()
+
+            conf_mask[b, best_anchors, gj, gi] = self.object_scale
+            tconf[b, best_anchors, gj, gi] = iou_gt_pred.view(nGT, nA, nH, nW)[
+                torch.arange(nGT), best_anchors, gj, gi
+            ]
+            coord_mask[b, best_anchors, gj, gi] = 2 - (gt[:, 2] * gt[:, 3]) / nPixels
+            tcoord[b, best_anchors, 0, gj, gi] = gt[:, 0] - gi.float()
+            tcoord[b, best_anchors, 1, gj, gi] = gt[:, 1] - gj.float()
+            tcoord[b, best_anchors, 2, gj, gi] = (
+                gt[:, 2] / self.anchors[best_anchors, 0]
+            ).log()
+            tcoord[b, best_anchors, 3, gj, gi] = (
+                gt[:, 3] / self.anchors[best_anchors, 1]
+            ).log()
+            cls_mask[b, best_anchors, gj, gi] = 1
+            tcls[b, best_anchors, gj, gi] = ground_truth[b, torch.arange(nGT), 0]
+
+        return (
+            coord_mask.view(nB, nA, 1, nPixels),
+            conf_mask.view(nB, nA, nPixels),
+            cls_mask.view(nB, nA, nPixels),
+            tcoord.view(nB, nA, 4, nPixels),
+            tconf.view(nB, nA, nPixels),
+            tcls.view(nB, nA, nPixels),
+        )
+
+    def __build_targets_brambox(self, pred_boxes, ground_truth, nB, nH, nW):
+        """Compare prediction boxes and ground truths, convert ground truths to network output tensors"""
+        # Parameters
+        nA = self.num_anchors
+        nAnchors = nA * nH * nW
+        nPixels = nH * nW
+
+        # Tensors
+        coord_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False)
+        conf_mask = (
+            torch.ones(nB, nA, nH, nW, requires_grad=False) * self.noobject_scale
+        )
+        if torchversion >= version120:
+            cls_mask = torch.zeros(
+                nB, nA, nH, nW, dtype=torch.bool, requires_grad=False
+            )
+        else:
+            cls_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False).byte()
+        tcoord = torch.zeros(nB, nA, 4, nH, nW, requires_grad=False)
+        tconf = torch.zeros(nB, nA, nH, nW, requires_grad=False)
+        tcls = torch.zeros(nB, nA, nH, nW, requires_grad=False)
+
+        if self.training and self.seen < self.coord_prefill:
+            coord_mask.fill_(math.sqrt(0.01 / self.coord_scale))
+            if self.anchor_step == 4:
+                tcoord[:, :, 0] = (
+                    self.anchors[:, 2]
+                    .contiguous()
+                    .view(1, nA, 1, 1)
+                    .repeat(nB, 1, 1, nPixels)
+                )
+                tcoord[:, :, 1] = (
+                    self.anchors[:, 3]
+                    .contiguous()
+                    .view(1, nA, 1, 1)
+                    .repeat(nB, 1, 1, nPixels)
+                )
+            else:
+                tcoord[:, :, 0].fill_(0.5)
+                tcoord[:, :, 1].fill_(0.5)
+
+        # Anchors
+        if self.anchor_step == 4:
+            anchors = self.anchors.clone()
+            anchors[:, :2] = 0
+        else:
+            anchors = torch.cat([torch.zeros_like(self.anchors), self.anchors], 1)
+
+        # Loop over GT
+        for b, gt_filtered in ground_truth.groupby("batch_number", sort=False):
+            cur_pred_boxes = pred_boxes[b * nAnchors : (b + 1) * nAnchors]
+
+            # Create ground_truth tensor
+            gt = torch.empty((gt_filtered.shape[0], 4), requires_grad=False)
+            gt[:, 2] = torch.from_numpy(gt_filtered.width.values).float() / self.stride
+            gt[:, 3] = torch.from_numpy(gt_filtered.height.values).float() / self.stride
+            gt[:, 0] = torch.from_numpy(
+                gt_filtered.x_top_left.values
+            ).float() / self.stride + (gt[:, 2] / 2)
+            gt[:, 1] = torch.from_numpy(
+                gt_filtered.y_top_left.values
+            ).float() / self.stride + (gt[:, 3] / 2)
+
+            # Set confidence mask of matching detections to 0
+            iou_gt_pred = bbox_ious(gt, cur_pred_boxes)
+            mask = (iou_gt_pred > self.thresh).sum(0) >= 1
+            conf_mask[b][mask.view_as(conf_mask[b])] = 0
+
+            # Find best anchor for each gt
+            iou_gt_anchors = bbox_wh_ious(gt, anchors)
+            _, best_anchors = iou_gt_anchors.max(1)
+
+            # Set masks and target values for each gt
+            nGT = gt.shape[0]
+            gi = gt[:, 0].clamp(0, nW - 1).long()
+            gj = gt[:, 1].clamp(0, nH - 1).long()
+
+            conf_mask[b, best_anchors, gj, gi] = self.object_scale
+            tconf[b, best_anchors, gj, gi] = iou_gt_pred.view(nGT, nA, nH, nW)[
+                torch.arange(nGT), best_anchors, gj, gi
+            ]
+            coord_mask[b, best_anchors, gj, gi] = 2 - (gt[:, 2] * gt[:, 3]) / nPixels
+            tcoord[b, best_anchors, 0, gj, gi] = gt[:, 0] - gi.float()
+            tcoord[b, best_anchors, 1, gj, gi] = gt[:, 1] - gj.float()
+            tcoord[b, best_anchors, 2, gj, gi] = (
+                gt[:, 2] / self.anchors[best_anchors, 0]
+            ).log()
+            tcoord[b, best_anchors, 3, gj, gi] = (
+                gt[:, 3] / self.anchors[best_anchors, 1]
+            ).log()
+            cls_mask[b, best_anchors, gj, gi] = 1
+            tcls[b, best_anchors, gj, gi] = torch.from_numpy(
+                gt_filtered.class_id.values
+            ).float()
+
+            # Set masks of ignored to zero
+            if gt_filtered.ignore.any():
+                if torchversion >= version120:
+                    ignore_mask = torch.from_numpy(gt_filtered.ignore.values)
+                else:
+                    ignore_mask = torch.from_numpy(
+                        gt_filtered.ignore.values.astype(np.uint8)
+                    )
+                gi = gi[ignore_mask]
+                gj = gj[ignore_mask]
+                best_anchors = best_anchors[ignore_mask]
+
+                conf_mask[b, best_anchors, gj, gi] = 0
+                coord_mask[b, best_anchors, gj, gi] = 0
+                cls_mask[b, best_anchors, gj, gi] = 0
+
+        return (
+            coord_mask.view(nB, nA, 1, nPixels),
+            conf_mask.view(nB, nA, nPixels),
+            cls_mask.view(nB, nA, nPixels),
+            tcoord.view(nB, nA, 4, nPixels),
+            tconf.view(nB, nA, nPixels),
+            tcls.view(nB, nA, nPixels),
+        )
+
+
+def bbox_ious(boxes1, boxes2):
+    """Compute IOU between all boxes from ``boxes1`` with all boxes from ``boxes2``.
+
+    Args:
+        boxes1 (torch.Tensor): List of bounding boxes
+        boxes2 (torch.Tensor): List of bounding boxes
+
+    Returns:
+        torch.Tensor[len(boxes1) X len(boxes2)]: IOU values
+
+    Note:
+        Tensor format: [[xc, yc, w, h],...]
+    """
+    b1x1, b1y1 = (boxes1[:, :2] - (boxes1[:, 2:4] / 2)).split(1, 1)
+    b1x2, b1y2 = (boxes1[:, :2] + (boxes1[:, 2:4] / 2)).split(1, 1)
+    b2x1, b2y1 = (boxes2[:, :2] - (boxes2[:, 2:4] / 2)).split(1, 1)
+    b2x2, b2y2 = (boxes2[:, :2] + (boxes2[:, 2:4] / 2)).split(1, 1)
+
+    dx = (b1x2.min(b2x2.t()) - b1x1.max(b2x1.t())).clamp(min=0)
+    dy = (b1y2.min(b2y2.t()) - b1y1.max(b2y1.t())).clamp(min=0)
+    intersections = dx * dy
+
+    areas1 = (b1x2 - b1x1) * (b1y2 - b1y1)
+    areas2 = (b2x2 - b2x1) * (b2y2 - b2y1)
+    unions = (areas1 + areas2.t()) - intersections
+
+    return intersections / unions
+
+
+def bbox_wh_ious(boxes1, boxes2):
+    """Shorter version of :func:`lightnet.network.loss._regionloss.bbox_ious`
+    for when we are only interested in W/H of the bounding boxes and not X/Y.
+
+    Args:
+        boxes1 (torch.Tensor): List of bounding boxes
+        boxes2 (torch.Tensor): List of bounding boxes
+
+    Returns:
+        torch.Tensor[len(boxes1) X len(boxes2)]: IOU values when discarding X/Y offsets (aka. as if they were zero)
+
+    Note:
+        Tensor format: [[xc, yc, w, h],...]
+    """
+    b1w = boxes1[:, 2].unsqueeze(1)
+    b1h = boxes1[:, 3].unsqueeze(1)
+    b2w = boxes2[:, 2]
+    b2h = boxes2[:, 3]
+
+    intersections = b1w.min(b2w) * b1h.min(b2h)
+    unions = (b1w * b1h) + (b2w * b2h) - intersections
+
+    return intersections / unions
diff --git a/hpvm/test/epoch_dnn/torch_dnn/yolo/model.py b/hpvm/test/epoch_dnn/torch_dnn/yolo/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4852a0f3abc44017b3cd87f427594cc06c9ec3f
--- /dev/null
+++ b/hpvm/test/epoch_dnn/torch_dnn/yolo/model.py
@@ -0,0 +1,117 @@
+from collections import Iterable, OrderedDict
+
+import lightnet.network as lnn
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+
+from .loss import RegionLoss
+
+DEFAULT_ANCHORS = [
+    (1.08, 1.19),
+    (3.42, 4.41),
+    (6.63, 11.38),
+    (9.42, 5.11),
+    (16.62, 10.52),
+]
+
+
+class TinyYoloV2(lnn.module.Darknet):
+    """Tiny Yolo v2 implementation :cite:`yolo_v2`.
+
+    Args:
+        num_classes (Number, optional): Number of classes; Default **20**
+        input_channels (Number, optional): Number of input channels; Default **3**
+        anchors (list, optional): 2D list with anchor values; Default **Tiny yolo v2 anchors (VOC)**
+
+    Attributes:
+        self.stride: Subsampling factor of the network (input_dim / output_dim)
+        self.inner_stride: Maximal internal subsampling factor of the network (input dimension should be a multiple of this)
+        self.remap_darknet: Remapping rules for weights from the :class:`~lightnet.models.Darknet` model.
+    """
+
+    stride = 32
+    inner_stride = 32
+    remap_darknet = [
+        (r"^layers.0.(\d+_)", r"layers.\1"),  # All base layers (1-13)
+    ]
+
+    def __init__(self, num_classes, input_channels=3, anchors=DEFAULT_ANCHORS):
+        super().__init__()
+        if not isinstance(anchors, Iterable) and not isinstance(anchors[0], Iterable):
+            raise TypeError("Anchors need to be a 2D list of numbers")
+
+        # Parameters
+        self.num_classes = num_classes
+        self.input_channels = input_channels
+        self.anchors = anchors
+
+        # Network
+        # fmt: off
+        momentum = 0.01
+        self.layers = nn.Sequential(
+            OrderedDict([
+                ('1_convbatch',     lnn.layer.Conv2dBatchReLU(input_channels, 16, 3, 1, 1, momentum=momentum)),
+                ('2_max',           nn.MaxPool2d(2, 2)),
+                ('3_convbatch',     lnn.layer.Conv2dBatchReLU(16, 32, 3, 1, 1, momentum=momentum)),
+                ('4_max',           nn.MaxPool2d(2, 2)),
+                ('5_convbatch',     lnn.layer.Conv2dBatchReLU(32, 64, 3, 1, 1, momentum=momentum)),
+                ('6_max',           nn.MaxPool2d(2, 2)),
+                ('7_convbatch',     lnn.layer.Conv2dBatchReLU(64, 128, 3, 1, 1, momentum=momentum)),
+                ('8_max',           nn.MaxPool2d(2, 2)),
+                ('9_convbatch',     lnn.layer.Conv2dBatchReLU(128, 256, 3, 1, 1, momentum=momentum)),
+                ('10_max',          nn.MaxPool2d(2, 2)),
+                ('11_convbatch',    lnn.layer.Conv2dBatchReLU(256, 512, 3, 1, 1, momentum=momentum)),
+                ('12_max',          lnn.layer.PaddedMaxPool2d(2, 1, (0, 1, 0, 1))),
+                ('13_convbatch',    lnn.layer.Conv2dBatchReLU(512, 1024, 3, 1, 1, momentum=momentum)),
+                ('14_convbatch',    lnn.layer.Conv2dBatchReLU(1024, 1024, 3, 1, 1, momentum=momentum)),
+                ('15_conv',         nn.Conv2d(1024, len(self.anchors)*(5+self.num_classes), 1, 1, 0)),
+            ])
+        )
+        # fmt: on
+
+
+class TinyYoloPL(pl.LightningModule):
+    def __init__(self, num_classes, stride=32, anchors=DEFAULT_ANCHORS):
+        super().__init__()
+        self.num_classes = num_classes
+        self.anchors = anchors
+        self.stride = stride
+        self.network = TinyYoloV2(num_classes)
+        self.loss = RegionLoss(
+            num_classes=self.network.num_classes,
+            anchors=self.network.anchors,
+            stride=self.network.stride,
+        )
+
+    def forward(self, image):
+        prediction = self.network(image)
+        return prediction
+
+    def training_step(self, batch, batch_idx):
+        _, images, targets = batch
+        prediction = self.network(images)
+        loss = self.loss(prediction, targets.cpu())
+        self.log("train_loss", loss)
+        return loss
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        return optimizer
+
+    def validation_step(self, val_batch, batch_idx):
+        _, images, target = val_batch
+        prediction = self(images)
+
+        loss = self.loss(prediction, target)
+        self.log("val_loss", loss)
+
+    def test_step(self, test_batch, batch_idx):
+        # TODO: Add mAP and other standard obj detection metrics
+        _, images, target = test_batch
+        prediction = self(images)
+
+        loss = self.loss(prediction, target.cpu())
+        self.log("test_loss", loss)
+
+        return loss