diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..2da9be9ac4ecfec68edac79f118cfe0be5e99862 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "examples/stand_count/pytorch-ssd"] + path = examples/stand_count/pytorch-ssd + url = git@github.com:qfgaohao/pytorch-ssd.git diff --git a/examples/stand_count/metric.py b/examples/stand_count/metric.py new file mode 100644 index 0000000000000000000000000000000000000000..101b0792dc29de78b66afe877b56ed91212b9362 --- /dev/null +++ b/examples/stand_count/metric.py @@ -0,0 +1,38 @@ +import numpy as np +import torch +from mean_average_precision import MeanAveragePrecision2d + + +def calculate_mAP(gt_boxes, pred_boxes, pred_scores, style="pascal"): + if style == "pascal": + settings = { + "iou_thresholds": 0.5, + "recall_thresholds": np.arange(0.0, 1.1, 0.1), + } + elif style == "coco": + settings = { + "iou_thresholds": np.arange(0.5, 1.0, 0.05), + "recall_thresholds": np.arange(0.0, 1.01, 0.01), + "mpolicy": "soft", + } + else: + raise ValueError(f"Unrecognized style {style}") + + map_calc = MeanAveragePrecision2d(num_classes=1) + torch2np = lambda x: x.detach().cpu().numpy() + for gt_boxes_, pred_boxes_, pred_scores_ in zip(gt_boxes, pred_boxes, pred_scores): + # Both groundtruth and predicted needs to be ndarray. + # Format for groundtruth: [xmin, ymin, xmax, ymax, class_id, difficult, crowd] + # Format for predicted: [xmin, ymin, xmax, ymax, class_id, confidence] + n_gt, n_pred = len(gt_boxes_), len(pred_boxes_) + gt_np = np.zeros((n_gt, 7)) + gt_np[:, :4] = torch2np(gt_boxes_) + gt_np[:, 4] = 0 # Class ID + pred_np = np.zeros((n_pred, 6)) + pred_np[:, :4] = torch2np(pred_boxes_) + pred_np[:, 4] = 0 # Class ID + pred_np[:, 5] = torch2np(pred_scores_) + map_calc.add(pred_np, gt_np) + # Calculate mAP + mAP = map_calc.value(**settings)["mAP"] + return torch.tensor(mAP) diff --git a/examples/stand_count/pytorch-ssd b/examples/stand_count/pytorch-ssd new file mode 160000 index 0000000000000000000000000000000000000000..f61ab424d09bf3d4bb3925693579ac0a92541b0d --- /dev/null +++ b/examples/stand_count/pytorch-ssd @@ -0,0 +1 @@ +Subproject commit f61ab424d09bf3d4bb3925693579ac0a92541b0d diff --git a/examples/stand_count/ssd.py b/examples/stand_count/ssd.py new file mode 100644 index 0000000000000000000000000000000000000000..bfca965ca1846fd7843ca83ad3fa749a75d0f1ab --- /dev/null +++ b/examples/stand_count/ssd.py @@ -0,0 +1,211 @@ +from pathlib import Path +from typing import Union + +import numpy as np +import torch +import torch.nn.functional as F +from torch.nn.modules.module import Module +from torch.utils.data.dataloader import default_collate + +from .vision.nn.multibox_loss import MultiboxLoss +from .vision.ssd.ssd import SSD, MatchPrior +from .vision.utils import box_utils + +# common across ssd configs: +center_variance = 0.1 +size_variance = 0.2 + + +class SSDWrapper(Module): + def __init__( + self, + net: SSD, + size: int, + image_mean: np.ndarray, + image_std: float, + priors: torch.Tensor, + ) -> None: + Module.__init__(self) + self.net = net.eval() + self.input_shape = 1, 3, size, size + mean = torch.as_tensor(image_mean).unsqueeze(-1).unsqueeze(-1) + self.register_buffer("mean", mean) + self.transform = lambda img: (F.interpolate(img, size) - self.mean) / image_std + self.register_buffer("priors", priors) + self.pprocessor = BoxPostprocessor() + self._scripting = False + + def step0(self, image: torch.Tensor): + scores, locations = self.net.forward(self.transform(image)) + return scores, locations + + def step1(self, scores: torch.Tensor, locations: torch.Tensor): + confidences = F.softmax(scores, dim=2) + locations = box_utils.convert_locations_to_boxes( + locations, + self.priors, + center_variance, + size_variance, + ) + boxes = box_utils.center_form_to_corner_form(locations) + return confidences, boxes + + def step2(self, confidences: torch.Tensor, boxes: torch.Tensor): + postprocessed = [ + self.pprocessor(image_scores, image_boxes) + for image_scores, image_boxes in zip(confidences, boxes) + ] + boxes, labels, confidences = zip(*postprocessed) + return boxes, labels, confidences + + def forward(self, image: torch.Tensor): + scores, locations = self.step0(image) + if self.training: + return scores, locations + confidences, boxes = self.step1(scores, locations) + if self._scripting: + return confidences, boxes + boxes, labels, confidences = self.step2(confidences, boxes) + return boxes, labels, confidences, (scores, locations) + + +def get_mobilenetv2_lite(prefix: str) -> SSDWrapper: + from .vision.ssd.config import mobilenetv1_ssd_config as config + from .vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite + + ssd_model = create_mobilenetv2_ssd_lite( + 2, onnx_compatible=True, is_test=False + ).eval() + ssd_model.init_from_pretrained_ssd(Path(prefix) / "mb2-ssd-lite-mp-0_686.pth") + wrapper = SSDWrapper( + ssd_model, config.image_size, config.image_mean, config.image_std, config.priors + ) + return wrapper + + +def get_vgg16(prefix: str) -> SSDWrapper: + from .vision.ssd.config import vgg_ssd_config as config + from .vision.ssd.vgg_ssd import create_vgg_ssd + + ssd_model = create_vgg_ssd(2, is_test=False).eval() + ssd_model.init_from_pretrained_ssd(Path(prefix) / "vgg16-ssd-mp-0_7726.pth") + wrapper = SSDWrapper( + ssd_model, config.image_size, config.image_mean, config.image_std, config.priors + ) + return wrapper + + +class LossAndMAP: + def __init__(self, priors: torch.Tensor, device: Union[str, torch.device]) -> None: + priors = priors.cpu() + self.loss_fn = MultiboxLoss( + priors, + iou_threshold=0.5, + neg_pos_ratio=3, + center_variance=0.1, + size_variance=0.2, + device=device, + ) + self.dataset_target_tr = MatchPrior(priors, center_variance, size_variance, 0.5) + self.device = device + + def __call__(self, model_output, targets): + if len(model_output) == 2: + # Training outputs. + scores, locations = model_output + boxes = confidences = [] + else: + # Evaluation outputs. + boxes, _, confidences, (scores, locations) = model_output + matched_labels, matched_boxes, gt_boxes = self._process_targets(targets) + reg_loss, cls_loss = self.loss_fn( + scores, locations, matched_labels, matched_boxes + ) + return reg_loss, cls_loss, gt_boxes, boxes, confidences + + def avg_reg_loss(self, loss_and_map): + reg_losses = list(zip(*loss_and_map))[0] + return torch.stack(reg_losses, 0).mean() + + def avg_cls_loss(self, loss_and_map): + cls_losses = list(zip(*loss_and_map))[1] + return torch.stack(cls_losses, 0).mean() + + def all_losses(self, loss_and_map): + return self.avg_reg_loss(loss_and_map) + self.avg_cls_loss(loss_and_map) + + def combine_map(self, loss_and_map): + from .metric import calculate_mAP + + flatten = lambda xss: [x for xs in xss for x in xs] + _, _, gt_boxes, boxes, confidences = zip(*loss_and_map) + gt_boxes = flatten(gt_boxes) + boxes, confidences = flatten(boxes), flatten(confidences) + return -calculate_mAP(gt_boxes, boxes, confidences) + + def _process_targets(self, targets): + gt_boxes, matched_boxes, matched_labels = [], [], [] + for t in targets: + this_boxes, this_labels = t["boxes"], t["labels"] + gt_boxes.append(this_boxes) + this_boxes_, this_labels_ = self.dataset_target_tr( + this_boxes.cpu(), this_labels.cpu() + ) + matched_boxes.append(this_boxes_) + matched_labels.append(this_labels_) + matched_boxes = default_collate(matched_boxes).to(self.device) + matched_labels = default_collate(matched_labels).to(self.device) + return matched_labels, matched_boxes, gt_boxes + + +class BoxPostprocessor(Module): + def __init__( + self, + nms_method=None, + iou_threshold=0.45, + filter_threshold=0.50, + candidate_size=200, + sigma=0.5, + ): + super().__init__() + self.iou_threshold = iou_threshold + self.filter_threshold = filter_threshold + self.candidate_size = candidate_size + self.nms_method = nms_method + self.sigma = sigma + + def forward(self, scores, boxes, top_k=-1, prob_threshold=None): + cpu_device = torch.device("cpu") + prob_threshold = prob_threshold or self.filter_threshold + # this version of nms is slower on GPU, so we move data to CPU. + boxes = boxes.to(cpu_device) + scores = scores.to(cpu_device) + picked_box_probs = [] + picked_labels = [] + for class_index in range(1, scores.size(1)): + probs = scores[:, class_index] + mask = probs > prob_threshold + probs = probs[mask] + if probs.size(0) == 0: + continue + subset_boxes = boxes[mask, :] + box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) + box_probs = box_utils.nms( + box_probs, + self.nms_method, + score_threshold=prob_threshold, + iou_threshold=self.iou_threshold, + sigma=self.sigma, + top_k=top_k, + candidate_size=self.candidate_size, + ) + picked_box_probs.append(box_probs) + picked_labels.extend([class_index] * box_probs.size(0)) + if not picked_box_probs: + return torch.zeros(0, 4), torch.tensor([]), torch.tensor([]) + picked_box_probs = torch.cat(picked_box_probs) + return ( + picked_box_probs[:, :4], + torch.tensor(picked_labels), + picked_box_probs[:, 4], + ) # boxes, labels, scores diff --git a/examples/stand_count/vision b/examples/stand_count/vision new file mode 120000 index 0000000000000000000000000000000000000000..c69be8baf28e36e160cc6935b5cb768b7754b6a7 --- /dev/null +++ b/examples/stand_count/vision @@ -0,0 +1 @@ +pytorch-ssd/vision \ No newline at end of file