Skip to content
Snippets Groups Projects
Commit ac1235a5 authored by Neta Zmora's avatar Neta Zmora
Browse files

Update ADC to newest Coach APIs

parent a43b9f10
No related branches found
No related tags found
No related merge requests found
...@@ -19,6 +19,7 @@ import copy ...@@ -19,6 +19,7 @@ import copy
import logging import logging
import numpy as np import numpy as np
import torch import torch
import json
import gym import gym
from gym import spaces from gym import spaces
import distiller import distiller
...@@ -27,15 +28,15 @@ from collections import OrderedDict, namedtuple ...@@ -27,15 +28,15 @@ from collections import OrderedDict, namedtuple
from types import SimpleNamespace from types import SimpleNamespace
from distiller import normalize_module_name from distiller import normalize_module_name
from base_parameters import TaskParameters from rl_coach.base_parameters import TaskParameters
# When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct
# Coach to create and use our CNNEnvironment environment. # Coach to create and use our CNNEnvironment environment.
# So Distiller calls Coach, which creates the environment, trains the agent, and ends. # So Distiller calls Coach, which creates the environment, trains the agent, and ends.
from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params
# Coach imports # Coach imports
from schedules import ConstantSchedule, PieceWiseSchedule, ExponentialSchedule from rl_coach.schedules import ConstantSchedule, PieceWiseSchedule, ExponentialSchedule
from core_types import EnvironmentSteps from rl_coach.core_types import EnvironmentSteps
msglogger = logging.getLogger() msglogger = logging.getLogger()
...@@ -45,6 +46,26 @@ ALMOST_ONE = 0.9999 ...@@ -45,6 +46,26 @@ ALMOST_ONE = 0.9999
USE_COACH = True USE_COACH = True
PERFORM_THINNING = True PERFORM_THINNING = True
#reward = -1 * (1-top1/100) * math.log(total_macs/self.dense_model_macs)
#
#reward = -1 * (1-top1/100) + math.log(total_macs/self.dense_model_macs)
#reward = 4*top1/100 - math.log(total_macs)
#reward = reward * total_macs/213201664
#reward = reward - 5 * total_macs/213201664
#reward = -1 * vloss * math.sqrt(math.log(total_macs))
#reward = top1 / math.log(total_macs)
#alpha = 0.9
#reward = -1 * ( (1-alpha)*(top1/100) + 10*alpha*(total_macs/self.dense_model_macs) )
#alpha = 0.99
#reward = -1 * ( (1-alpha)*(top1/100) + alpha*(total_macs/self.dense_model_macs) )
#reward = vloss * math.log(total_macs)
#reward = -1 * vloss * (total_macs / self.dense_model_macs)
#reward = top1 * (self.dense_model_macs / total_macs)
#reward = -1 * math.log(total_macs)
#reward = -1 * vloss
def do_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn): def do_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn):
np.random.seed() np.random.seed()
...@@ -63,7 +84,10 @@ def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn ...@@ -63,7 +84,10 @@ def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn
# Create a dictionary of parameters that Coach will handover to CNNEnvironment # Create a dictionary of parameters that Coach will handover to CNNEnvironment
# Once it creates it. # Once it creates it.
if False: if True:
exploration_noise = 0.5
#exploration_noise = 0.25
exploitation_decay = 0.996
graph_manager.env_params.additional_simulator_parameters = { graph_manager.env_params.additional_simulator_parameters = {
'model': model, 'model': model,
'dataset': dataset, 'dataset': dataset,
...@@ -71,14 +95,21 @@ def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn ...@@ -71,14 +95,21 @@ def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn
'data_loader': data_loader, 'data_loader': data_loader,
'validate_fn': validate_fn, 'validate_fn': validate_fn,
'save_checkpoint_fn': save_checkpoint_fn, 'save_checkpoint_fn': save_checkpoint_fn,
'exploration_noise': 0.5, #'action_range': (0.10, 0.95),
'exploitation_decay': 0.996, 'action_range': (0.70, 0.95),
'action_range': (0.10, 0.95),
'onehot_encoding': False, 'onehot_encoding': False,
'normalize_obs': True, 'normalize_obs': True,
'desired_reduction': None 'desired_reduction': None,
'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top5/100) * math.log(total_macs)
#'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs)
#'reward_fn': lambda top1, total_macs: -1 * max(1-top1/100, 0.25) * math.log(total_macs)
#'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs/100000)
#'reward_fn': lambda top1, total_macs: top1/100 * total_macs/self.dense_model_macs
} }
else: else:
exploration_noise = 0.5
#exploration_noise = 0.25
exploitation_decay = 0.996
graph_manager.env_params.additional_simulator_parameters = { graph_manager.env_params.additional_simulator_parameters = {
'model': model, 'model': model,
'dataset': dataset, 'dataset': dataset,
...@@ -86,18 +117,19 @@ def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn ...@@ -86,18 +117,19 @@ def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn
'data_loader': data_loader, 'data_loader': data_loader,
'validate_fn': validate_fn, 'validate_fn': validate_fn,
'save_checkpoint_fn': save_checkpoint_fn, 'save_checkpoint_fn': save_checkpoint_fn,
'exploration_noise': 0.5,
'exploitation_decay': 0.996,
'action_range': (0.10, 0.95), 'action_range': (0.10, 0.95),
'onehot_encoding': True, 'onehot_encoding': False,
'normalize_obs': True, 'normalize_obs': True,
'desired_reduction': 2.0e8 # 1.5e8 'desired_reduction': 1.5e8,
'reward_fn': lambda top1, total_macs: top1/100
#'reward_fn': lambda top1, total_macs: min(top1/100, 0.75)
} }
#msglogger.debug('Experiment configuarion:\n' + json.dumps(graph_manager.env_params.additional_simulator_parameters, indent=2))
steps_per_episode = 13 steps_per_episode = 13
agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(0.5), agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(exploration_noise),
EnvironmentSteps(100*steps_per_episode)), EnvironmentSteps(100*steps_per_episode)),
(ExponentialSchedule(0.5, 0, 0.996), (ExponentialSchedule(exploration_noise, 0, exploitation_decay),
EnvironmentSteps(300*steps_per_episode))]) EnvironmentSteps(300*steps_per_episode))])
graph_manager.create_graph(task_parameters) graph_manager.create_graph(task_parameters)
graph_manager.improve() graph_manager.improve()
...@@ -107,8 +139,8 @@ class CNNEnvironment(gym.Env): ...@@ -107,8 +139,8 @@ class CNNEnvironment(gym.Env):
metadata = {'render.modes': ['human']} metadata = {'render.modes': ['human']}
def __init__(self, model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn, def __init__(self, model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn,
exploration_noise, exploitation_decay, action_range, action_range, onehot_encoding, normalize_obs, desired_reduction,
onehot_encoding, normalize_obs, desired_reduction): reward_fn):
self.pylogger = distiller.data_loggers.PythonLogger(msglogger) self.pylogger = distiller.data_loggers.PythonLogger(msglogger)
self.tflogger = distiller.data_loggers.TensorBoardLogger(msglogger.logdir) self.tflogger = distiller.data_loggers.TensorBoardLogger(msglogger.logdir)
...@@ -121,6 +153,7 @@ class CNNEnvironment(gym.Env): ...@@ -121,6 +153,7 @@ class CNNEnvironment(gym.Env):
self.onehot_encoding = onehot_encoding self.onehot_encoding = onehot_encoding
self.normalize_obs = normalize_obs self.normalize_obs = normalize_obs
self.max_reward = -1000 self.max_reward = -1000
self.reward_fn = reward_fn
self.conv_layers, self.dense_model_macs, self.dense_model_size = collect_conv_details(model, dataset) self.conv_layers, self.dense_model_macs, self.dense_model_size = collect_conv_details(model, dataset)
self.reset(init_only=True) self.reset(init_only=True)
...@@ -131,8 +164,6 @@ class CNNEnvironment(gym.Env): ...@@ -131,8 +164,6 @@ class CNNEnvironment(gym.Env):
self.debug_stats = {'episode': 0} self.debug_stats = {'episode': 0}
self.action_low = action_range[0] self.action_low = action_range[0]
self.action_high = action_range[1] self.action_high = action_range[1]
self.exploitation_decay = exploitation_decay
self.exploration_noise = exploration_noise
# Gym # Gym
# spaces documentation: https://gym.openai.com/docs/ # spaces documentation: https://gym.openai.com/docs/
self.action_space = spaces.Box(self.action_low, self.action_high, shape=(1,)) self.action_space = spaces.Box(self.action_low, self.action_high, shape=(1,))
...@@ -289,7 +320,7 @@ class CNNEnvironment(gym.Env): ...@@ -289,7 +320,7 @@ class CNNEnvironment(gym.Env):
self.max_reward = reward self.max_reward = reward
self.save_checkpoint(is_best=True) self.save_checkpoint(is_best=True)
msglogger.info("Best reward={} episode={} top1={}".format(reward, self.debug_stats['episode'], top1)) msglogger.info("Best reward={} episode={} top1={}".format(reward, self.debug_stats['episode'], top1))
self.save_checkpoint(is_best=False)
else: else:
observation = self._get_obs(next_layer_macs) observation = self._get_obs(next_layer_macs)
if True: if True:
...@@ -428,30 +459,8 @@ class CNNEnvironment(gym.Env): ...@@ -428,30 +459,8 @@ class CNNEnvironment(gym.Env):
msglogger.info("Total compute left: %.2f%%" % (total_macs/self.dense_model_macs*100)) msglogger.info("Total compute left: %.2f%%" % (total_macs/self.dense_model_macs*100))
top1, top5, vloss = self.validate_fn(model=self.model, epoch=self.debug_stats['episode']) top1, top5, vloss = self.validate_fn(model=self.model, epoch=self.debug_stats['episode'])
#reward = -1 * (1 - top1/100) reward = self.reward_fn(top1, top5, vloss, total_macs)
if self.desired_reduction is not None:
reward = top1/100
else:
reward = -1 * (1-top1/100) * math.log(total_macs)
#reward = -1 * (1-top1/100) * math.log(total_macs/self.dense_model_macs)
#
#reward = -1 * (1-top1/100) + math.log(total_macs/self.dense_model_macs)
#reward = 4*top1/100 - math.log(total_macs)
#reward = reward * total_macs/213201664
#reward = reward - 5 * total_macs/213201664
#reward = -1 * vloss * math.sqrt(math.log(total_macs))
#reward = top1 / math.log(total_macs)
#alpha = 0.9
#reward = -1 * ( (1-alpha)*(top1/100) + 10*alpha*(total_macs/self.dense_model_macs) )
#alpha = 0.99
#reward = -1 * ( (1-alpha)*(top1/100) + alpha*(total_macs/self.dense_model_macs) )
#reward = vloss * math.log(total_macs)
#reward = -1 * vloss * (total_macs / self.dense_model_macs)
#reward = top1 * (self.dense_model_macs / total_macs)
#reward = -1 * math.log(total_macs)
#reward = -1 * vloss
stats = ('Peformance/Validation/', stats = ('Peformance/Validation/',
OrderedDict([('Loss', vloss), OrderedDict([('Loss', vloss),
('Top1', top1), ('Top1', top1),
...@@ -509,19 +518,22 @@ def collect_conv_details(model, dataset): ...@@ -509,19 +518,22 @@ def collect_conv_details(model, dataset):
conv.name = name conv.name = name
conv.id = id conv.id = id
conv_layers[len(conv_layers)] = conv conv_layers[len(conv_layers)] = conv
return conv_layers, total_macs, total_nnz return conv_layers, total_macs, total_nnz
from examples.automated_deep_compression.adc_controlled_envs import *
def random_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn): def random_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn):
"""Random ADC agent""" """Random ADC agent"""
action_range = (0.0, 1.0) action_range = (0.0, 1.0)
env = CNNEnvironment(model, dataset, arch, data_loader, env = CNNEnvironment(model, dataset, arch, data_loader,
validate_fn, save_checkpoint_fn, action_range) validate_fn, save_checkpoint_fn, action_range,
onehot_encoding=False, normalize_obs=False, desired_reduction=None,
best = [-1000, None] reward_fn=lambda top1, total_macs: top1/100)
env.action_space = RandomADCActionSpace(action_range[0], action_range[1])
for ep in range(100): best_episode = [-1000, None]
update_rate = 5
env.action_space = RandomADCActionSpace(action_range[0], action_range[1], std=0.35)
for ep in range(1000):
observation = env.reset() observation = env.reset()
action_config = [] action_config = []
for t in range(100): for t in range(100):
...@@ -531,14 +543,17 @@ def random_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_f ...@@ -531,14 +543,17 @@ def random_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_f
action = env.action_space.sample() action = env.action_space.sample()
action_config.append(action) action_config.append(action)
observation, reward, done, info = env.step(action) observation, reward, done, info = env.step(action)
if reward > best[0]:
best[0] = reward
best[1] = action_config
msglogger.info("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
msglogger.info("New solution found: episode={} reward={} config={}".format(ep, reward, action_config))
if done: if done:
msglogger.info("Episode finished after {} timesteps".format(t+1)) msglogger.info("Episode finished after {} timesteps".format(t+1))
msglogger.info("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
msglogger.info("New solution found: episode={} reward={} config={}".format(ep, reward, action_config))
break break
if reward > best_episode[0]:
best_episode[0] = reward
best_episode[1] = action_config
if ep % update_rate == 0:
env.action_space.set_cfg(means=best_episode[1], std=0.4)
best_episode = [-1000, None]
import os import os
......
"""This file contains a couple of environments used for debugging ADC reproduction. """This file contains a couple of environments used for debugging ADC reproduction.
""" """
import random import random
import numpy as np
from scipy.stats import truncnorm
class RandomADCActionSpace(object): class RandomADCActionSpace(object):
def __init__(self, low, high): def __init__(self, low, high, std):
self.low = low self.clip_low = low
self.high = high self.clip_high = high
self.layer = 0
self.num_layers = 13
#self.means = [high-low] * self.num_layers
self.means = [0.9, 0.9, 0.9, 0.9, 0.9, 0.8, 0.8, 0.7, 0.7, 0.6, 0.6, 0.5, 0.5]
self.std = std
def sample(self): def sample(self):
return random.uniform(self.low, self.high) return random.uniform(self.clip_low, self.clip_high)
action_values_mean = self.means[self.layer]
action_values_std = self.std
normalized_low = (self.clip_low - action_values_mean) / action_values_std
normalized_high = (self.clip_high - action_values_mean) / action_values_std
distribution = truncnorm(normalized_low, normalized_high, loc=action_values_mean, scale=action_values_std)
action = distribution.rvs(1)
# action = np.random.normal(self.means[self.layer], self.std)
# action = min(self.clip_high, max(action, self.clip_low))
self.layer = (self.layer + 1) % self.num_layers
return action
def set_cfg(self, means, std):
self.means = [0.01*m for m in self.means] + [0.99*m for m in means]
self.std = std
class PredictableADCActionSpace(object): class PredictableADCActionSpace(object):
......
from agents.ddpg_agent import DDPGAgentParameters from rl_coach.agents.ddpg_agent import DDPGAgentParameters
from graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from graph_managers.graph_manager import ScheduleParameters from rl_coach.graph_managers.graph_manager import ScheduleParameters
from base_parameters import VisualizationParameters from rl_coach.base_parameters import VisualizationParameters
from core_types import EnvironmentEpisodes, EnvironmentSteps from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps
from environments.gym_environment import MujocoInputFilter, GymEnvironmentParameters, MujocoOutputFilter from rl_coach.environments.gym_environment import MujocoInputFilter, GymEnvironmentParameters, MujocoOutputFilter
from exploration_policies.additive_noise import AdditiveNoiseParameters from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
from exploration_policies.truncated_normal import TruncatedNormalParameters from rl_coach.exploration_policies.truncated_normal import TruncatedNormalParameters
from schedules import ConstantSchedule, PieceWiseSchedule, ExponentialSchedule from rl_coach.schedules import ConstantSchedule, PieceWiseSchedule, ExponentialSchedule
from memories.memory import MemoryGranularity from rl_coach.memories.memory import MemoryGranularity
from base_parameters import EmbedderScheme from rl_coach.base_parameters import EmbedderScheme
from architectures.tensorflow_components.architecture import Dense from rl_coach.architectures.tensorflow_components.architecture import Dense
#################### ####################
...@@ -30,6 +30,7 @@ agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense([30 ...@@ -30,6 +30,7 @@ agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense([30
agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense([300])] agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense([300])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense([300])] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense([300])]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['actor'].heads_parameters[0].activation_function = 'sigmoid'
#agent_params.network_wrappers['critic'].clip_gradients = 100 #agent_params.network_wrappers['critic'].clip_gradients = 100
#agent_params.network_wrappers['actor'].clip_gradients = 100 #agent_params.network_wrappers['actor'].clip_gradients = 100
...@@ -55,6 +56,7 @@ env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment' ...@@ -55,6 +56,7 @@ env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment'
vis_params = VisualizationParameters() vis_params = VisualizationParameters()
vis_params.dump_parameters_documentation = False
graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
schedule_params=schedule_params, vis_params=vis_params) schedule_params=schedule_params, vis_params=vis_params)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment