From 8c07eb1eec7fa25cac4d06db97255cc85ed9dbe1 Mon Sep 17 00:00:00 2001
From: Neta Zmora <neta.zmora@intel.com>
Date: Sun, 17 Feb 2019 10:56:55 +0200
Subject: [PATCH] AMC: added configuration option to set the frequency of
 computing a reward

--amc-reward-frequency
Computing the reward requires running the evaluated network on the Test
dataset (or parts of it) and may involve short-term fine-tuning before
the evaluation (depending on the configuration).
Use this new argument to configure the number of steps/iterations between
reward computation.
---
 examples/automated_deep_compression/ADC.py    | 25 ++++++++++---------
 .../automated_deep_compression/automl_args.py |  4 +--
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/examples/automated_deep_compression/ADC.py b/examples/automated_deep_compression/ADC.py
index 590acf4..e92f03a 100755
--- a/examples/automated_deep_compression/ADC.py
+++ b/examples/automated_deep_compression/ADC.py
@@ -214,11 +214,11 @@ def do_adc_internal(model, args, optimizer_data, validate_fn, save_checkpoint_fn
     amc_cfg = distiller.utils.MutableNamedTuple({
             'protocol': args.amc_protocol,
             'agent_algo': args.amc_agent_algo,
-            'compute_reward_every_step': args.amc_reward_every_step,
             'perform_thinning': perform_thinning,
             'num_ft_epochs': num_ft_epochs,
             'action_range': action_range,
-            'conv_cnt': conv_cnt})
+            'conv_cnt': conv_cnt,
+            'reward_frequency': args.amc_reward_frequency})
 
     #net_wrapper = NetworkWrapper(model, app_args, services)
     #return sample_networks(net_wrapper, services)
@@ -239,13 +239,14 @@ def do_adc_internal(model, args, optimizer_data, validate_fn, save_checkpoint_fn
         raise ValueError("{} is not supported currently".format(args.amc_protocol))
 
     steps_per_episode = conv_cnt
-    amc_cfg.heatup_noise = 0.5
-    amc_cfg.initial_training_noise = 0.5
-    amc_cfg.training_noise_decay = 0.996  # 0.998
-    amc_cfg.num_heatup_epochs = args.amc_heatup_epochs
-    amc_cfg.num_training_epochs = args.amc_training_epochs
-    training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode
-    heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode
+    if args.amc_agent_algo == "DDPG":
+        amc_cfg.heatup_noise = 0.5
+        amc_cfg.initial_training_noise = 0.5
+        amc_cfg.training_noise_decay = 0.996  # 0.998
+        amc_cfg.num_heatup_epochs = args.amc_heatup_epochs
+        amc_cfg.num_training_epochs = args.amc_training_epochs
+        training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode
+        heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode
 
     if amc_cfg.agent_algo == "Random-policy":
         return random_agent(DistillerWrapperEnvironment(model, app_args, amc_cfg, services))
@@ -603,10 +604,10 @@ class DistillerWrapperEnvironment(gym.Env):
             self.episode += 1
         else:
             observation = self.get_obs()
-            reward = 0
-            if self.amc_cfg.compute_reward_every_step:
+            if self.amc_cfg.reward_frequency > 0 and self.current_layer_id % self.amc_cfg.reward_frequency == 0:
                 reward, top1, total_macs, total_nnz = self.compute_reward(False)
-
+            else:
+                reward = 0
         self.prev_action = pruning_action
         info = {}
         return observation, reward, self.episode_is_done(), info
diff --git a/examples/automated_deep_compression/automl_args.py b/examples/automated_deep_compression/automl_args.py
index e15907c..a598189 100755
--- a/examples/automated_deep_compression/automl_args.py
+++ b/examples/automated_deep_compression/automl_args.py
@@ -22,8 +22,8 @@ def add_automl_args(argparser, arch_choices=None, enable_pretrained=False):
                        help='The number of epochs for heatup/exploration')
     group.add_argument('--amc-training-epochs', type=int, default=300,
                        help='The number of epochs for training/exploitation')
-    group.add_argument('--amc-reward-every-step', action='store_true', default=False,
-                       help='Compute the reward at every step')
+    group.add_argument('--amc-reward-frequency', type=int, default=-1,
+                       help='Reward computation frequency (measured in agent steps)')
     group.add_argument('--amc-target-density', type=float,
                        help='Target density of the network we are seeking')
     group.add_argument('--amc-agent-algo', choices=["ClippedPPO-continuous",
-- 
GitLab