From 87055fed974f4e4e0851d6c03c0859a4d29e1dc9 Mon Sep 17 00:00:00 2001
From: Neta Zmora <neta.zmora@intel.com>
Date: Mon, 4 Mar 2019 01:45:33 +0200
Subject: [PATCH] compress_classifier.py: Fix best_epoch logic

Based on a commit and ideas from @barrh:
https://github.com/NervanaSystems/distiller/pull/150/commits/1623db3cdc3a95ab620e2dc6863cff23a91087bd

The sample application compress_classifier.py logs details about
the best performing epoch(s) and stores the best epoch in a checkpoint
file named ```best.pth.tar``` by default (if you use the ```--name```
application argument, the checkpoint name will be prefixed by ```best```).

Until this fix, the performance of a model was judged solely on its
Top1 accuracy.  This can be a problem when performing gradual pruning
of a pre-trained model, because many times a model's Top1 accuracy
increases with light pruning and this is registered as the best performing
training epoch.  However, we are really interested in the best performing
trained model _after_ the pruning phase is done.  Even during training, we
may be interested in the checkpoint of the best performing model with the
highest sparsity.
This fix stores a list of the performance results from all the trained
epochs so far.  This list is sorted using a hierarchical key:
(sparsity, top1, top5, epoch), so that the list is first sorted by sparsity,
then top1, followed by top5 and epoch.

But what if you want to sort using a different metric?  For example, when
quantizing you may want to score the best performance by the total number of
bits used to represent the model parameters and feature-maps.  In such a case
you may want to replace ```sparsity``` by this new metric.  Because this is a
sample application, we don't load it with all possible control logic, and
anyone can make local changes to this logic.  To keep your code separated from
the main application logic, we plan to refactor the application code sometime
in the next few months.
---
 .../compress_classifier.py                    | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/examples/classifier_compression/compress_classifier.py b/examples/classifier_compression/compress_classifier.py
index 7d49d2e..a4730c3 100755
--- a/examples/classifier_compression/compress_classifier.py
+++ b/examples/classifier_compression/compress_classifier.py
@@ -53,8 +53,6 @@ models, or with the provided sample models:
 import math
 import time
 import os
-import sys
-import random
 import traceback
 import logging
 from collections import OrderedDict
@@ -75,6 +73,7 @@ import distiller.quantization as quantization
 import examples.automated_deep_compression as adc
 from distiller.models import ALL_MODEL_NAMES, create_model
 import parser
+import operator
 
 
 # Logger handle
@@ -95,14 +94,11 @@ def main():
 
     # Log various details about the execution environment.  It is sometimes useful
     # to refer to past experiment executions and this information may be useful.
-    apputils.log_execution_env_state(args.compress,
-        msglogger.logdir, gitroot=module_path)
+    apputils.log_execution_env_state(args.compress, msglogger.logdir, gitroot=module_path)
     msglogger.debug("Distiller: %s", distiller.__version__)
 
     start_epoch = 0
-    best_epochs = [distiller.MutableNamedTuple({'epoch': 0, 'top1': 0, 'sparsity': 0})
-                   for i in range(args.num_best_scores)]
-
+    perf_scores_history = []
     if args.deterministic:
         # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
         # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
@@ -284,17 +280,19 @@ def main():
             compression_scheduler.on_epoch_end(epoch, optimizer)
 
         # Update the list of top scores achieved so far, and save the checkpoint
-        is_best = top1 > best_epochs[-1].top1
-        if top1 > best_epochs[0].top1:
-            best_epochs[0].epoch = epoch
-            best_epochs[0].top1 = top1
-            # Keep best_epochs sorted such that best_epochs[0] is the lowest top1 in the best_epochs list
-            best_epochs = sorted(best_epochs, key=lambda score: score.top1)
-        for score in reversed(best_epochs):
-            if score.top1 > 0:
-                msglogger.info('==> Best Top1: %.3f on Epoch: %d', score.top1, score.epoch)
+        sparsity = distiller.model_sparsity(model)
+        perf_scores_history.append(distiller.MutableNamedTuple({'sparsity': sparsity, 'top1': top1,
+                                                                'top5': top5, 'epoch': epoch}))
+        # Keep perf_scores_history sorted from best to worst
+        # Sort by sparsity as main sort key, then sort by top1, top5 and epoch
+        perf_scores_history.sort(key=operator.attrgetter('sparsity', 'top1', 'top5', 'epoch'), reverse=True)
+        for score in perf_scores_history[:args.num_best_scores]:
+            msglogger.info('==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d]',
+                           score.top1, score.top5, score.sparsity, score.epoch)
+
+        is_best = epoch == perf_scores_history[0].epoch
         apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler,
-                                 best_epochs[-1].top1, is_best, args.name, msglogger.logdir)
+                                 perf_scores_history[0].top1, is_best, args.name, msglogger.logdir)
 
     # Finally run results on the test set
     test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
-- 
GitLab