From fe27ab90fe4f2ee3e80ead37f3d9d1866d57635e Mon Sep 17 00:00:00 2001
From: Neta Zmora <31280975+nzmora@users.noreply.github.com>
Date: Mon, 27 May 2019 00:05:41 +0300
Subject: [PATCH] Added support for setting the PRNG seed (#269)

Added set_seed() to Distiller and added support for seeding the PRNG when setting --deterministic mode (prior to this change, the seed is always set to zero when running in deterministic mode.
The PRNGs of Pytorch (CPU & Cuda devices), numpy and Python are set.
Added support for ```--seed``` to classifier_compression.py.
---
 distiller/utils.py                            | 27 ++++++++++++++-----
 .../compress_classifier.py                    | 14 +++++-----
 examples/classifier_compression/parser.py     |  2 ++
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/distiller/utils.py b/distiller/utils.py
index f0b24a5..72994ab 100755
--- a/distiller/utils.py
+++ b/distiller/utils.py
@@ -595,18 +595,31 @@ def make_non_parallel_copy(model):
     return new_model
 
 
-def set_deterministic():
-    msglogger.debug('set_deterministic is called')
-    torch.manual_seed(0)
-    random.seed(0)
-    np.random.seed(0)
+def set_seed(seed):
+    """Seed the PRNG for the CPU, Cuda, numpy and Python"""
+    torch.manual_seed(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+
+
+def set_deterministic(seed=0):
+    '''Try to configure the system for reproducible results.
+
+    Experiment reproducibility is sometimes important.  Pete Warden expounded about this
+    in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
+    For Pytorch specifics see: https://pytorch.org/docs/stable/notes/randomness.html#reproducibility
+    '''
+    msglogger.debug('set_deterministic was invoked')
+    if seed is None:
+        seed = 0
+    set_seed(seed)
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
 
 
 def yaml_ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
-    """
-    Function to load YAML file using an OrderedDict
+    """Function to load YAML file using an OrderedDict
+
     See: https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts
     """
     class OrderedLoader(Loader):
diff --git a/examples/classifier_compression/compress_classifier.py b/examples/classifier_compression/compress_classifier.py
index 4fa9385..b4e81f0 100755
--- a/examples/classifier_compression/compress_classifier.py
+++ b/examples/classifier_compression/compress_classifier.py
@@ -102,22 +102,22 @@ def main():
         msglogger.logdir, gitroot=module_path)
     msglogger.debug("Distiller: %s", distiller.__version__)
 
-    start_epoch = 0
-    ending_epoch = args.epochs
-    perf_scores_history = []
-
     if args.evaluate:
         args.deterministic = True
     if args.deterministic:
-        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
-        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
-        distiller.set_deterministic()  # Use a well-known seed, for repeatability of experiments
+        distiller.set_deterministic(args.seed) # For experiment reproducability
     else:
+        if args.seed is not None:
+            distiller.set_seed(args.seed)
         # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
         # classification models, as the input sizes don't change during the run
         # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
         cudnn.benchmark = True
 
+    start_epoch = 0
+    ending_epoch = args.epochs
+    perf_scores_history = []
+
     if args.cpu or not torch.cuda.is_available():
         # Set GPU index to -1 if using CPU
         args.device = 'cpu'
diff --git a/examples/classifier_compression/parser.py b/examples/classifier_compression/parser.py
index 5bc4870..f19abac 100755
--- a/examples/classifier_compression/parser.py
+++ b/examples/classifier_compression/parser.py
@@ -97,6 +97,8 @@ def get_parser():
                         help='file with extra configuration information')
     parser.add_argument('--deterministic', '--det', action='store_true',
                         help='Ensure deterministic execution for re-producible results.')
+    parser.add_argument('--seed', type=int, default=None,
+                        help='seed the PRNG for CPU, CUDA, numpy, and Python')
     parser.add_argument('--gpus', metavar='DEV_ID', default=None,
                         help='Comma-separated list of GPU device IDs to be used (default is to use all available devices)')
     parser.add_argument('--cpu', action='store_true', default=False,
-- 
GitLab