From fe27ab90fe4f2ee3e80ead37f3d9d1866d57635e Mon Sep 17 00:00:00 2001 From: Neta Zmora <31280975+nzmora@users.noreply.github.com> Date: Mon, 27 May 2019 00:05:41 +0300 Subject: [PATCH] Added support for setting the PRNG seed (#269) Added set_seed() to Distiller and added support for seeding the PRNG when setting --deterministic mode (prior to this change, the seed is always set to zero when running in deterministic mode. The PRNGs of Pytorch (CPU & Cuda devices), numpy and Python are set. Added support for ```--seed``` to classifier_compression.py. --- distiller/utils.py | 27 ++++++++++++++----- .../compress_classifier.py | 14 +++++----- examples/classifier_compression/parser.py | 2 ++ 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/distiller/utils.py b/distiller/utils.py index f0b24a5..72994ab 100755 --- a/distiller/utils.py +++ b/distiller/utils.py @@ -595,18 +595,31 @@ def make_non_parallel_copy(model): return new_model -def set_deterministic(): - msglogger.debug('set_deterministic is called') - torch.manual_seed(0) - random.seed(0) - np.random.seed(0) +def set_seed(seed): + """Seed the PRNG for the CPU, Cuda, numpy and Python""" + torch.manual_seed(seed) + random.seed(seed) + np.random.seed(seed) + + +def set_deterministic(seed=0): + '''Try to configure the system for reproducible results. + + Experiment reproducibility is sometimes important. Pete Warden expounded about this + in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ + For Pytorch specifics see: https://pytorch.org/docs/stable/notes/randomness.html#reproducibility + ''' + msglogger.debug('set_deterministic was invoked') + if seed is None: + seed = 0 + set_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False def yaml_ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict): - """ - Function to load YAML file using an OrderedDict + """Function to load YAML file using an OrderedDict + See: https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts """ class OrderedLoader(Loader): diff --git a/examples/classifier_compression/compress_classifier.py b/examples/classifier_compression/compress_classifier.py index 4fa9385..b4e81f0 100755 --- a/examples/classifier_compression/compress_classifier.py +++ b/examples/classifier_compression/compress_classifier.py @@ -102,22 +102,22 @@ def main(): msglogger.logdir, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) - start_epoch = 0 - ending_epoch = args.epochs - perf_scores_history = [] - if args.evaluate: args.deterministic = True if args.deterministic: - # Experiment reproducibility is sometimes important. Pete Warden expounded about this - # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ - distiller.set_deterministic() # Use a well-known seed, for repeatability of experiments + distiller.set_deterministic(args.seed) # For experiment reproducability else: + if args.seed is not None: + distiller.set_seed(args.seed) # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image # classification models, as the input sizes don't change during the run # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3 cudnn.benchmark = True + start_epoch = 0 + ending_epoch = args.epochs + perf_scores_history = [] + if args.cpu or not torch.cuda.is_available(): # Set GPU index to -1 if using CPU args.device = 'cpu' diff --git a/examples/classifier_compression/parser.py b/examples/classifier_compression/parser.py index 5bc4870..f19abac 100755 --- a/examples/classifier_compression/parser.py +++ b/examples/classifier_compression/parser.py @@ -97,6 +97,8 @@ def get_parser(): help='file with extra configuration information') parser.add_argument('--deterministic', '--det', action='store_true', help='Ensure deterministic execution for re-producible results.') + parser.add_argument('--seed', type=int, default=None, + help='seed the PRNG for CPU, CUDA, numpy, and Python') parser.add_argument('--gpus', metavar='DEV_ID', default=None, help='Comma-separated list of GPU device IDs to be used (default is to use all available devices)') parser.add_argument('--cpu', action='store_true', default=False, -- GitLab