From 8c07eb1eec7fa25cac4d06db97255cc85ed9dbe1 Mon Sep 17 00:00:00 2001 From: Neta Zmora <neta.zmora@intel.com> Date: Sun, 17 Feb 2019 10:56:55 +0200 Subject: [PATCH] AMC: added configuration option to set the frequency of computing a reward --amc-reward-frequency Computing the reward requires running the evaluated network on the Test dataset (or parts of it) and may involve short-term fine-tuning before the evaluation (depending on the configuration). Use this new argument to configure the number of steps/iterations between reward computation. --- examples/automated_deep_compression/ADC.py | 25 ++++++++++--------- .../automated_deep_compression/automl_args.py | 4 +-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/examples/automated_deep_compression/ADC.py b/examples/automated_deep_compression/ADC.py index 590acf4..e92f03a 100755 --- a/examples/automated_deep_compression/ADC.py +++ b/examples/automated_deep_compression/ADC.py @@ -214,11 +214,11 @@ def do_adc_internal(model, args, optimizer_data, validate_fn, save_checkpoint_fn amc_cfg = distiller.utils.MutableNamedTuple({ 'protocol': args.amc_protocol, 'agent_algo': args.amc_agent_algo, - 'compute_reward_every_step': args.amc_reward_every_step, 'perform_thinning': perform_thinning, 'num_ft_epochs': num_ft_epochs, 'action_range': action_range, - 'conv_cnt': conv_cnt}) + 'conv_cnt': conv_cnt, + 'reward_frequency': args.amc_reward_frequency}) #net_wrapper = NetworkWrapper(model, app_args, services) #return sample_networks(net_wrapper, services) @@ -239,13 +239,14 @@ def do_adc_internal(model, args, optimizer_data, validate_fn, save_checkpoint_fn raise ValueError("{} is not supported currently".format(args.amc_protocol)) steps_per_episode = conv_cnt - amc_cfg.heatup_noise = 0.5 - amc_cfg.initial_training_noise = 0.5 - amc_cfg.training_noise_decay = 0.996 # 0.998 - amc_cfg.num_heatup_epochs = args.amc_heatup_epochs - amc_cfg.num_training_epochs = args.amc_training_epochs - training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode - heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode + if args.amc_agent_algo == "DDPG": + amc_cfg.heatup_noise = 0.5 + amc_cfg.initial_training_noise = 0.5 + amc_cfg.training_noise_decay = 0.996 # 0.998 + amc_cfg.num_heatup_epochs = args.amc_heatup_epochs + amc_cfg.num_training_epochs = args.amc_training_epochs + training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode + heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode if amc_cfg.agent_algo == "Random-policy": return random_agent(DistillerWrapperEnvironment(model, app_args, amc_cfg, services)) @@ -603,10 +604,10 @@ class DistillerWrapperEnvironment(gym.Env): self.episode += 1 else: observation = self.get_obs() - reward = 0 - if self.amc_cfg.compute_reward_every_step: + if self.amc_cfg.reward_frequency > 0 and self.current_layer_id % self.amc_cfg.reward_frequency == 0: reward, top1, total_macs, total_nnz = self.compute_reward(False) - + else: + reward = 0 self.prev_action = pruning_action info = {} return observation, reward, self.episode_is_done(), info diff --git a/examples/automated_deep_compression/automl_args.py b/examples/automated_deep_compression/automl_args.py index e15907c..a598189 100755 --- a/examples/automated_deep_compression/automl_args.py +++ b/examples/automated_deep_compression/automl_args.py @@ -22,8 +22,8 @@ def add_automl_args(argparser, arch_choices=None, enable_pretrained=False): help='The number of epochs for heatup/exploration') group.add_argument('--amc-training-epochs', type=int, default=300, help='The number of epochs for training/exploitation') - group.add_argument('--amc-reward-every-step', action='store_true', default=False, - help='Compute the reward at every step') + group.add_argument('--amc-reward-frequency', type=int, default=-1, + help='Reward computation frequency (measured in agent steps)') group.add_argument('--amc-target-density', type=float, help='Target density of the network we are seeking') group.add_argument('--amc-agent-algo', choices=["ClippedPPO-continuous", -- GitLab