Skip to content
Snippets Groups Projects
Commit e8455766 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Adding yolo model and its assets (sans the checkpoint)

parent d4df33b8
No related branches found
No related tags found
No related merge requests found
File added
input: 0.007874015748031496
conv1: 0.06992515383252429
add1: 0.06992515383252429
batch1: 0.2993613302727509
add2: 0.2993613302727509
relu1: 0.2760873789424184
pool1: 0.2760873789424184
conv2: 1.7186674777227626
add3: 1.7186674777227626
batch2: 0.28008715327870537
add4: 0.28008715327870537
relu2: 0.28008715327870537
pool2: 0.28008715327870537
conv3: 2.2675250452397657
add5: 2.2675250452397657
batch3: 0.31653867929469365
add6: 0.31653867929469365
relu3: 0.24789561100538912
pool3: 0.24789561100538912
conv4: 2.9807093186696476
add7: 2.9807093186696476
batch4: 0.21296565215572236
add8: 0.21296565215572236
relu4: 0.1663890279110257
pool4: 0.1663890279110257
conv5: 2.988272911994118
add9: 2.988272911994118
batch5: 0.21199497273731988
add10: 0.21199497273731988
relu5: 0.21199497273731988
pool5: 0.21199497273731988
conv6: 3.519411158090996
add11: 3.519411158090996
batch6: 0.19962375980710337
add12: 0.19962375980710337
relu6: 0.19962375980710337
pool6: 0.19962375980710337
add13: 0.19962375980710337
conv7: 17.843374345294812
add14: 17.843374345294812
batch7: 0.7490898030888518
add15: 0.7490898030888518
relu7: 0.7490898030888518
conv8: 57.37602537549188
add16: 57.37602537549188
batch8: 0.30873182541207267
add17: 0.30873182541207267
relu8: 0.2930820529042543
conv9: 20.397035890622195
add18: 20.397035890622195
from .dataset import CompressedATRDataset, DefaultTransforms
from .loss import RegionLoss
from .model import TinyYoloPL
import logging
import tarfile
from pathlib import Path
from typing import Union
import imgaug.augmenters as iaa
import numpy as np
import torch
import torchvision.transforms as transforms
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from PIL import Image
from torch.utils.data import Dataset
logger = logging.getLogger(__name__)
PathLike = Union[Path, str]
class CompressedATRDataset(Dataset):
loaded_size = 640, 480
output_size = 640, 640
def __init__(
self,
tar_dataset: PathLike,
transform=None,
n_boxes: int = 10,
extract_to: PathLike = None,
):
from tempfile import TemporaryDirectory
self.tar_dataset = Path(tar_dataset)
if extract_to is None:
temp_dir = TemporaryDirectory()
temp_dir.cleanup() # Cleans up on GC
self.temp_dir = Path(temp_dir.name)
else:
self.temp_dir = Path(extract_to)
tar = tarfile.open(self.tar_dataset)
tar.extractall(path=self.temp_dir)
tar.close()
self.image_files = list(self.temp_dir.glob("**/*.png"))
logger.info("Loaded %d images", len(self.image_files))
self.transform = transform
self.n_ret_boxes = n_boxes
def __getitem__(self, index):
image_path = self.image_files[index]
image = Image.open(image_path).convert("RGB")
assert image.size == self.loaded_size
boxes_tensor = self._read_boxes(image_path.with_suffix(".txt"), image)
if self.transform is not None:
image_np, boxes_tensor = self.transform(np.array(image), boxes_tensor)
image_tensor = transforms.ToTensor()(image_np)
return image_tensor, boxes_tensor, image_path.as_posix()
def _read_boxes(self, path: Path, image: Image.Image):
boxes = np.loadtxt(path).reshape(-1, 5)
boxes[:, [1, 3]] /= image.width
boxes[:, [2, 4]] /= image.height
assert boxes.shape[0] <= 10
n_padding = 10 - boxes.shape[0]
padding_tensor = np.zeros((n_padding, 5), dtype=float)
padding_tensor[:, 0] = -1
boxes = np.concatenate((boxes, padding_tensor), axis=0)
return torch.tensor(boxes, dtype=torch.float)
def __len__(self):
return len(self.image_files)
class DefaultTransforms:
def __init__(self):
self.augmentations = iaa.PadToAspectRatio(
1.0, position="center-center"
).to_deterministic()
def __call__(self, img, boxes):
# Convert xywh to xyxy
boxes = np.array(boxes)
boxes[:, 1:] = xywh2xyxy_np(boxes[:, 1:])
# Convert bounding boxes to imgaug
bounding_boxes = BoundingBoxesOnImage(
[BoundingBox(*box[1:], label=box[0]) for box in boxes], shape=img.shape
)
# Apply augmentations
img, bounding_boxes = self.augmentations(
image=img, bounding_boxes=bounding_boxes
)
# Clip out of image boxes
bounding_boxes = bounding_boxes.clip_out_of_image()
# Convert bounding boxes back to numpy
boxes = np.zeros((len(bounding_boxes), 5))
for box_idx, box in enumerate(bounding_boxes):
# Extract coordinates for unpadded + unscaled image
x1 = box.x1
y1 = box.y1
x2 = box.x2
y2 = box.y2
# Returns (x, y, w, h)
boxes[box_idx, 0] = box.label
boxes[box_idx, 1] = (x1 + x2) / 2
boxes[box_idx, 2] = (y1 + y2) / 2
boxes[box_idx, 3] = x2 - x1
boxes[box_idx, 4] = y2 - y1
return img, boxes
def xywh2xyxy_np(x: np.ndarray):
y: np.ndarray = np.zeros_like(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
return y
#
# Darknet RegionLoss
# Copyright EAVISE
#
import logging
import math
from distutils.version import LooseVersion
import numpy as np
import torch
import torch.nn as nn
try:
import pandas as pd
except ModuleNotFoundError:
pd = None
__all__ = ["RegionLoss"]
log = logging.getLogger(__name__)
torchversion = LooseVersion(torch.__version__)
version120 = LooseVersion("1.2.0")
class RegionLoss(nn.modules.loss._Loss):
"""Computes region loss from darknet network output and target annotation (yoloV2).
Args:
num_classes (int): number of classes to detect
anchors (list): 2D list representing anchor boxes (see :class:`lightnet.network.Darknet`)
stride (optional, int): The downsampling factor of the network (input_dimension / output_dimension); Default **32**
seen (optional, torch.Tensor): How many images the network has already been trained on; Default **0**
coord_scale (optional, float): weight of bounding box coordinates; Default **1.0**
noobject_scale (optional, float): weight of regions without target boxes; Default **1.0**
object_scale (optional, float): weight of regions with target boxes; Default **5.0**
class_scale (optional, float): weight of categorical predictions; Default **1.0**
thresh (optional, float): minimum iou between a predicted box and ground truth for them to be considered matching; Default **0.6**
coord_prefill (optional, int): This parameter controls for how many training samples the network will prefill the target coordinates, biassing the network to predict the center at **.5,.5**; Default **12800**
"""
def __init__(
self,
num_classes,
anchors,
stride=32,
seen=0,
coord_scale=1.0,
noobject_scale=1.0,
object_scale=5.0,
class_scale=1.0,
thresh=0.6,
coord_prefill=12800,
):
super().__init__()
self.num_classes = num_classes
self.stride = stride
self.num_anchors = len(anchors)
self.anchor_step = len(anchors[0])
self.anchors = torch.tensor(anchors, dtype=torch.float, requires_grad=False)
self.register_buffer("seen", torch.tensor(seen))
self.coord_scale = coord_scale
self.noobject_scale = noobject_scale
self.object_scale = object_scale
self.class_scale = class_scale
self.thresh = thresh
self.coord_prefill = coord_prefill
self.mse = nn.MSELoss(reduction="sum")
self.cel = nn.CrossEntropyLoss(reduction="sum")
self.loss_total = torch.tensor(0.0)
self.loss_conf = torch.tensor(0.0)
self.loss_coord = torch.tensor(0.0)
self.loss_class = torch.tensor(0.0)
@property
def values(self):
"""Return detached sub-losses in a dictionary.
Note:
You can access the individual loss values directly as ``object.loss_<name>`` as well. |br|
This will return the actual loss tensor with its attached computational graph and gives you full freedom for modifying this loss prior to the backward pass.
"""
return {
"total": self.loss_total.detach(),
"conf": self.loss_conf.detach(),
"coord": self.loss_coord.detach(),
"class": self.loss_class.detach(),
}
@property
def loss(self):
log.deprecated('The "loss" attribute is deprecated in favor for "loss_total"')
return self.loss_total
def extra_repr(self):
repr_str = f"classes={self.num_classes}, stride={self.stride}, threshold={self.thresh}, seen={self.seen.item()}\n"
repr_str += f"coord_scale={self.coord_scale}, object_scale={self.object_scale}, noobject_scale={self.noobject_scale}, class_scale={self.class_scale}\n"
repr_str += f"anchors="
for a in self.anchors:
repr_str += f"[{a[0]:.5g}, {a[1]:.5g}] "
return repr_str
def forward(self, output, target, seen=None):
""" Compute Region loss.
Args:
output (torch.autograd.Variable): Output from the network
target (brambox annotation dataframe or torch.Tensor): Brambox annotations or tensor containing the annotation targets (see :class:`lightnet.data.BramboxToTensor`)
seen (int, optional): How many images the network has already been trained on; Default **Add batch_size to previous seen value**
Note:
If using a target tensor, it should have the dimensions `[num_batch, num_anno, 5]` and following format per image:
.. math::
\\begin{bmatrix}
class\\_idx & x\\_center & y\\_center & width & height \\\\
class\\_idx & x\\_center & y\\_center & width & height \\\\
... \\\\
-1 & 0 & 0 & 0 & 0 \\\\
-1 & 0 & 0 & 0 & 0 \\\\
...
\\end{bmatrix}
With all coordinates being relative to the image size. |br|
Since the annotations from all images of a batch should be made of the same length, you can pad them with: `[-1, 0, 0, 0, 0]`.
Note:
Besides being easier to work with, brambox dataframes have the added benefit that
this loss function will also consider the ``ignore`` flag of annotations and ignore detections that match with it.
This allows you to have annotations that will not influence the loss in any way,
as opposed to having them removed and counting them as false detections.
"""
# Parameters
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3)
nPixels = nH * nW
device = output.device
if seen is not None:
self.seen = torch.tensor(seen)
elif self.training:
self.seen += nB
# Get x,y,w,h,conf,cls
output = output.view(nB, nA, -1, nPixels)
coord = torch.zeros_like(output[:, :, :4])
coord[:, :, :2] = output[:, :, :2].sigmoid() # tx,ty
coord[:, :, 2:4] = output[:, :, 2:4] # tw,th
conf = output[:, :, 4].sigmoid()
if nC > 1:
cls = (
output[:, :, 5:]
.contiguous()
.view(nB * nA, nC, nPixels)
.transpose(1, 2)
.contiguous()
.view(-1, nC)
)
# Create prediction boxes
pred_boxes = torch.FloatTensor(nB * nA * nPixels, 4)
lin_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).view(nPixels).to(device)
lin_y = (
torch.linspace(0, nH - 1, nH)
.view(nH, 1)
.repeat(1, nW)
.view(nPixels)
.to(device)
)
anchor_w = self.anchors[:, 0].contiguous().view(nA, 1).to(device)
anchor_h = self.anchors[:, 1].contiguous().view(nA, 1).to(device)
pred_boxes[:, 0] = (coord[:, :, 0].detach() + lin_x).view(-1)
pred_boxes[:, 1] = (coord[:, :, 1].detach() + lin_y).view(-1)
pred_boxes[:, 2] = (coord[:, :, 2].detach().exp() * anchor_w).view(-1)
pred_boxes[:, 3] = (coord[:, :, 3].detach().exp() * anchor_h).view(-1)
pred_boxes = pred_boxes.cpu()
# Get target values
coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(
pred_boxes, target, nB, nH, nW
)
coord_mask = coord_mask.expand_as(tcoord).to(device).sqrt()
conf_mask = conf_mask.to(device).sqrt()
tcoord = tcoord.to(device)
tconf = tconf.to(device)
if nC > 1:
tcls = tcls[cls_mask].view(-1).long().to(device)
cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(device)
cls = cls[cls_mask].view(-1, nC)
# Compute losses
self.loss_coord = (
self.coord_scale
* self.mse(coord * coord_mask, tcoord * coord_mask)
/ (2 * nB)
)
self.loss_conf = self.mse(conf * conf_mask, tconf * conf_mask) / (2 * nB)
if nC > 1:
if tcls.numel() > 0:
self.loss_class = self.class_scale * self.cel(cls, tcls) / nB
else:
self.loss_class = torch.tensor(0.0, device=device)
else:
self.loss_class = torch.tensor(0.0, device=device)
self.loss_total = self.loss_coord + self.loss_conf + self.loss_class
return self.loss_total
def build_targets(self, pred_boxes, ground_truth, nB, nH, nW):
"""Compare prediction boxes and targets, convert targets to network output tensors"""
if torch.is_tensor(ground_truth):
return self.__build_targets_tensor(pred_boxes, ground_truth, nB, nH, nW)
elif pd is not None and isinstance(ground_truth, pd.DataFrame):
return self.__build_targets_brambox(pred_boxes, ground_truth, nB, nH, nW)
else:
raise TypeError(f"Unkown ground truth format [{type(ground_truth)}]")
def __build_targets_tensor(self, pred_boxes, ground_truth, nB, nH, nW):
"""Compare prediction boxes and ground truths, convert ground truths to network output tensors"""
# Parameters
nT = ground_truth.size(1)
nA = self.num_anchors
nAnchors = nA * nH * nW
nPixels = nH * nW
# Tensors
coord_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False)
conf_mask = (
torch.ones(nB, nA, nH, nW, requires_grad=False) * self.noobject_scale
)
if torchversion >= version120:
cls_mask = torch.zeros(
nB, nA, nH, nW, dtype=torch.bool, requires_grad=False
)
else:
cls_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False).byte()
tcoord = torch.zeros(nB, nA, 4, nH, nW, requires_grad=False)
tconf = torch.zeros(nB, nA, nH, nW, requires_grad=False)
tcls = torch.zeros(nB, nA, nH, nW, requires_grad=False)
if self.training and self.seen < self.coord_prefill:
coord_mask.fill_(math.sqrt(0.01 / self.coord_scale))
if self.anchor_step == 4:
tcoord[:, :, 0] = (
self.anchors[:, 2]
.contiguous()
.view(1, nA, 1, 1)
.repeat(nB, 1, 1, nPixels)
)
tcoord[:, :, 1] = (
self.anchors[:, 3]
.contiguous()
.view(1, nA, 1, 1)
.repeat(nB, 1, 1, nPixels)
)
else:
tcoord[:, :, 0].fill_(0.5)
tcoord[:, :, 1].fill_(0.5)
# Anchors
if self.anchor_step == 4:
anchors = self.anchors.clone()
anchors[:, :2] = 0
else:
anchors = torch.cat([torch.zeros_like(self.anchors), self.anchors], 1)
# Loop over GT
for b in range(nB):
gt = ground_truth[b][
(ground_truth[b, :, 0] >= 0)[:, None].expand_as(ground_truth[b])
].view(-1, 5)
if gt.numel() == 0: # No gt for this image
continue
# Build up tensors
cur_pred_boxes = pred_boxes[b * nAnchors : (b + 1) * nAnchors]
gt = gt[:, 1:]
gt[:, ::2] *= nW
gt[:, 1::2] *= nH
# Set confidence mask of matching detections to 0
iou_gt_pred = bbox_ious(gt, cur_pred_boxes)
mask = (iou_gt_pred > self.thresh).sum(0) >= 1
conf_mask[b][mask.view_as(conf_mask[b])] = 0
# Find best anchor for each gt
iou_gt_anchors = bbox_wh_ious(gt, anchors)
_, best_anchors = iou_gt_anchors.max(1)
# Set masks and target values for each gt
nGT = gt.shape[0]
gi = gt[:, 0].clamp(0, nW - 1).long()
gj = gt[:, 1].clamp(0, nH - 1).long()
conf_mask[b, best_anchors, gj, gi] = self.object_scale
tconf[b, best_anchors, gj, gi] = iou_gt_pred.view(nGT, nA, nH, nW)[
torch.arange(nGT), best_anchors, gj, gi
]
coord_mask[b, best_anchors, gj, gi] = 2 - (gt[:, 2] * gt[:, 3]) / nPixels
tcoord[b, best_anchors, 0, gj, gi] = gt[:, 0] - gi.float()
tcoord[b, best_anchors, 1, gj, gi] = gt[:, 1] - gj.float()
tcoord[b, best_anchors, 2, gj, gi] = (
gt[:, 2] / self.anchors[best_anchors, 0]
).log()
tcoord[b, best_anchors, 3, gj, gi] = (
gt[:, 3] / self.anchors[best_anchors, 1]
).log()
cls_mask[b, best_anchors, gj, gi] = 1
tcls[b, best_anchors, gj, gi] = ground_truth[b, torch.arange(nGT), 0]
return (
coord_mask.view(nB, nA, 1, nPixels),
conf_mask.view(nB, nA, nPixels),
cls_mask.view(nB, nA, nPixels),
tcoord.view(nB, nA, 4, nPixels),
tconf.view(nB, nA, nPixels),
tcls.view(nB, nA, nPixels),
)
def __build_targets_brambox(self, pred_boxes, ground_truth, nB, nH, nW):
"""Compare prediction boxes and ground truths, convert ground truths to network output tensors"""
# Parameters
nA = self.num_anchors
nAnchors = nA * nH * nW
nPixels = nH * nW
# Tensors
coord_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False)
conf_mask = (
torch.ones(nB, nA, nH, nW, requires_grad=False) * self.noobject_scale
)
if torchversion >= version120:
cls_mask = torch.zeros(
nB, nA, nH, nW, dtype=torch.bool, requires_grad=False
)
else:
cls_mask = torch.zeros(nB, nA, nH, nW, requires_grad=False).byte()
tcoord = torch.zeros(nB, nA, 4, nH, nW, requires_grad=False)
tconf = torch.zeros(nB, nA, nH, nW, requires_grad=False)
tcls = torch.zeros(nB, nA, nH, nW, requires_grad=False)
if self.training and self.seen < self.coord_prefill:
coord_mask.fill_(math.sqrt(0.01 / self.coord_scale))
if self.anchor_step == 4:
tcoord[:, :, 0] = (
self.anchors[:, 2]
.contiguous()
.view(1, nA, 1, 1)
.repeat(nB, 1, 1, nPixels)
)
tcoord[:, :, 1] = (
self.anchors[:, 3]
.contiguous()
.view(1, nA, 1, 1)
.repeat(nB, 1, 1, nPixels)
)
else:
tcoord[:, :, 0].fill_(0.5)
tcoord[:, :, 1].fill_(0.5)
# Anchors
if self.anchor_step == 4:
anchors = self.anchors.clone()
anchors[:, :2] = 0
else:
anchors = torch.cat([torch.zeros_like(self.anchors), self.anchors], 1)
# Loop over GT
for b, gt_filtered in ground_truth.groupby("batch_number", sort=False):
cur_pred_boxes = pred_boxes[b * nAnchors : (b + 1) * nAnchors]
# Create ground_truth tensor
gt = torch.empty((gt_filtered.shape[0], 4), requires_grad=False)
gt[:, 2] = torch.from_numpy(gt_filtered.width.values).float() / self.stride
gt[:, 3] = torch.from_numpy(gt_filtered.height.values).float() / self.stride
gt[:, 0] = torch.from_numpy(
gt_filtered.x_top_left.values
).float() / self.stride + (gt[:, 2] / 2)
gt[:, 1] = torch.from_numpy(
gt_filtered.y_top_left.values
).float() / self.stride + (gt[:, 3] / 2)
# Set confidence mask of matching detections to 0
iou_gt_pred = bbox_ious(gt, cur_pred_boxes)
mask = (iou_gt_pred > self.thresh).sum(0) >= 1
conf_mask[b][mask.view_as(conf_mask[b])] = 0
# Find best anchor for each gt
iou_gt_anchors = bbox_wh_ious(gt, anchors)
_, best_anchors = iou_gt_anchors.max(1)
# Set masks and target values for each gt
nGT = gt.shape[0]
gi = gt[:, 0].clamp(0, nW - 1).long()
gj = gt[:, 1].clamp(0, nH - 1).long()
conf_mask[b, best_anchors, gj, gi] = self.object_scale
tconf[b, best_anchors, gj, gi] = iou_gt_pred.view(nGT, nA, nH, nW)[
torch.arange(nGT), best_anchors, gj, gi
]
coord_mask[b, best_anchors, gj, gi] = 2 - (gt[:, 2] * gt[:, 3]) / nPixels
tcoord[b, best_anchors, 0, gj, gi] = gt[:, 0] - gi.float()
tcoord[b, best_anchors, 1, gj, gi] = gt[:, 1] - gj.float()
tcoord[b, best_anchors, 2, gj, gi] = (
gt[:, 2] / self.anchors[best_anchors, 0]
).log()
tcoord[b, best_anchors, 3, gj, gi] = (
gt[:, 3] / self.anchors[best_anchors, 1]
).log()
cls_mask[b, best_anchors, gj, gi] = 1
tcls[b, best_anchors, gj, gi] = torch.from_numpy(
gt_filtered.class_id.values
).float()
# Set masks of ignored to zero
if gt_filtered.ignore.any():
if torchversion >= version120:
ignore_mask = torch.from_numpy(gt_filtered.ignore.values)
else:
ignore_mask = torch.from_numpy(
gt_filtered.ignore.values.astype(np.uint8)
)
gi = gi[ignore_mask]
gj = gj[ignore_mask]
best_anchors = best_anchors[ignore_mask]
conf_mask[b, best_anchors, gj, gi] = 0
coord_mask[b, best_anchors, gj, gi] = 0
cls_mask[b, best_anchors, gj, gi] = 0
return (
coord_mask.view(nB, nA, 1, nPixels),
conf_mask.view(nB, nA, nPixels),
cls_mask.view(nB, nA, nPixels),
tcoord.view(nB, nA, 4, nPixels),
tconf.view(nB, nA, nPixels),
tcls.view(nB, nA, nPixels),
)
def bbox_ious(boxes1, boxes2):
"""Compute IOU between all boxes from ``boxes1`` with all boxes from ``boxes2``.
Args:
boxes1 (torch.Tensor): List of bounding boxes
boxes2 (torch.Tensor): List of bounding boxes
Returns:
torch.Tensor[len(boxes1) X len(boxes2)]: IOU values
Note:
Tensor format: [[xc, yc, w, h],...]
"""
b1x1, b1y1 = (boxes1[:, :2] - (boxes1[:, 2:4] / 2)).split(1, 1)
b1x2, b1y2 = (boxes1[:, :2] + (boxes1[:, 2:4] / 2)).split(1, 1)
b2x1, b2y1 = (boxes2[:, :2] - (boxes2[:, 2:4] / 2)).split(1, 1)
b2x2, b2y2 = (boxes2[:, :2] + (boxes2[:, 2:4] / 2)).split(1, 1)
dx = (b1x2.min(b2x2.t()) - b1x1.max(b2x1.t())).clamp(min=0)
dy = (b1y2.min(b2y2.t()) - b1y1.max(b2y1.t())).clamp(min=0)
intersections = dx * dy
areas1 = (b1x2 - b1x1) * (b1y2 - b1y1)
areas2 = (b2x2 - b2x1) * (b2y2 - b2y1)
unions = (areas1 + areas2.t()) - intersections
return intersections / unions
def bbox_wh_ious(boxes1, boxes2):
"""Shorter version of :func:`lightnet.network.loss._regionloss.bbox_ious`
for when we are only interested in W/H of the bounding boxes and not X/Y.
Args:
boxes1 (torch.Tensor): List of bounding boxes
boxes2 (torch.Tensor): List of bounding boxes
Returns:
torch.Tensor[len(boxes1) X len(boxes2)]: IOU values when discarding X/Y offsets (aka. as if they were zero)
Note:
Tensor format: [[xc, yc, w, h],...]
"""
b1w = boxes1[:, 2].unsqueeze(1)
b1h = boxes1[:, 3].unsqueeze(1)
b2w = boxes2[:, 2]
b2h = boxes2[:, 3]
intersections = b1w.min(b2w) * b1h.min(b2h)
unions = (b1w * b1h) + (b2w * b2h) - intersections
return intersections / unions
from collections import Iterable, OrderedDict
import lightnet.network as lnn
import pytorch_lightning as pl
import torch
import torch.nn as nn
from .loss import RegionLoss
DEFAULT_ANCHORS = [
(1.08, 1.19),
(3.42, 4.41),
(6.63, 11.38),
(9.42, 5.11),
(16.62, 10.52),
]
class TinyYoloV2(lnn.module.Darknet):
"""Tiny Yolo v2 implementation :cite:`yolo_v2`.
Args:
num_classes (Number, optional): Number of classes; Default **20**
input_channels (Number, optional): Number of input channels; Default **3**
anchors (list, optional): 2D list with anchor values; Default **Tiny yolo v2 anchors (VOC)**
Attributes:
self.stride: Subsampling factor of the network (input_dim / output_dim)
self.inner_stride: Maximal internal subsampling factor of the network (input dimension should be a multiple of this)
self.remap_darknet: Remapping rules for weights from the :class:`~lightnet.models.Darknet` model.
"""
stride = 32
inner_stride = 32
remap_darknet = [
(r"^layers.0.(\d+_)", r"layers.\1"), # All base layers (1-13)
]
def __init__(self, num_classes, input_channels=3, anchors=DEFAULT_ANCHORS):
super().__init__()
if not isinstance(anchors, Iterable) and not isinstance(anchors[0], Iterable):
raise TypeError("Anchors need to be a 2D list of numbers")
# Parameters
self.num_classes = num_classes
self.input_channels = input_channels
self.anchors = anchors
# Network
# fmt: off
momentum = 0.01
self.layers = nn.Sequential(
OrderedDict([
('1_convbatch', lnn.layer.Conv2dBatchReLU(input_channels, 16, 3, 1, 1, momentum=momentum)),
('2_max', nn.MaxPool2d(2, 2)),
('3_convbatch', lnn.layer.Conv2dBatchReLU(16, 32, 3, 1, 1, momentum=momentum)),
('4_max', nn.MaxPool2d(2, 2)),
('5_convbatch', lnn.layer.Conv2dBatchReLU(32, 64, 3, 1, 1, momentum=momentum)),
('6_max', nn.MaxPool2d(2, 2)),
('7_convbatch', lnn.layer.Conv2dBatchReLU(64, 128, 3, 1, 1, momentum=momentum)),
('8_max', nn.MaxPool2d(2, 2)),
('9_convbatch', lnn.layer.Conv2dBatchReLU(128, 256, 3, 1, 1, momentum=momentum)),
('10_max', nn.MaxPool2d(2, 2)),
('11_convbatch', lnn.layer.Conv2dBatchReLU(256, 512, 3, 1, 1, momentum=momentum)),
('12_max', lnn.layer.PaddedMaxPool2d(2, 1, (0, 1, 0, 1))),
('13_convbatch', lnn.layer.Conv2dBatchReLU(512, 1024, 3, 1, 1, momentum=momentum)),
('14_convbatch', lnn.layer.Conv2dBatchReLU(1024, 1024, 3, 1, 1, momentum=momentum)),
('15_conv', nn.Conv2d(1024, len(self.anchors)*(5+self.num_classes), 1, 1, 0)),
])
)
# fmt: on
class TinyYoloPL(pl.LightningModule):
def __init__(self, num_classes, stride=32, anchors=DEFAULT_ANCHORS):
super().__init__()
self.num_classes = num_classes
self.anchors = anchors
self.stride = stride
self.network = TinyYoloV2(num_classes)
self.loss = RegionLoss(
num_classes=self.network.num_classes,
anchors=self.network.anchors,
stride=self.network.stride,
)
def forward(self, image):
prediction = self.network(image)
return prediction
def training_step(self, batch, batch_idx):
_, images, targets = batch
prediction = self.network(images)
loss = self.loss(prediction, targets.cpu())
self.log("train_loss", loss)
return loss
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def validation_step(self, val_batch, batch_idx):
_, images, target = val_batch
prediction = self(images)
loss = self.loss(prediction, target)
self.log("val_loss", loss)
def test_step(self, test_batch, batch_idx):
# TODO: Add mAP and other standard obj detection metrics
_, images, target = test_batch
prediction = self(images)
loss = self.loss(prediction, target.cpu())
self.log("test_loss", loss)
return loss
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment