diff --git a/distiller/__init__.py b/distiller/__init__.py
index 6dec91f01ce395aa9c94cc9e3cab84899c102bca..6b0c21fa4da047803752ee32893c620d0efd5380 100755
--- a/distiller/__init__.py
+++ b/distiller/__init__.py
@@ -82,3 +82,19 @@ def model_find_param(model, param_to_find_name):
         if name == param_to_find_name:
             return param
     return None
+
+
+def model_find_module(model, module_to_find):
+    """Given a module name, find the module in the provided model.
+
+    Arguments:
+        model: the model to search
+        module_to_find: the module whose name we want to look up
+
+    Returns:
+        The module or None, if the module was not found.
+    """
+    for name, m in model.named_modules():
+        if name == module_to_find:
+            return m
+    return None
diff --git a/distiller/data_loggers/logger.py b/distiller/data_loggers/logger.py
index ae3ad55b0b70f73be40c5bc159df878f3ff8211f..796c6c3870cdec901ab8767182be485e8d8c91e4 100755
--- a/distiller/data_loggers/logger.py
+++ b/distiller/data_loggers/logger.py
@@ -70,13 +70,15 @@ class PythonLogger(DataLogger):
 
     def log_training_progress(self, stats_dict, epoch, completed, total, freq):
         stats_dict = stats_dict[1]
-        if epoch>-1:
+        if epoch > -1:
             log = 'Epoch: [{}][{:5d}/{:5d}]    '.format(epoch, completed, int(total))
         else:
             log = 'Test: [{:5d}/{:5d}]    '.format(completed, int(total))
-            #log = 'Test: [{1:5d}/{2:5d}]    '.format(total)
         for name, val in stats_dict.items():
-            log = log + '{name} {val:.6f}    '.format(name=name, val=val)
+            if isinstance(val, int):
+                log = log + '{name} {val}    '.format(name=name, val=distiller.pretty_int(val))
+            else:
+                log = log + '{name} {val:.6f}    '.format(name=name, val=val)
         self.pylogger.info(log)
 
 
diff --git a/distiller/model_summaries.py b/distiller/model_summaries.py
index cf67720376532ae87f05a8f0795f3885fdbe3383..81384024591b4c5f57c47f848d1d1fde5ab08ddd 100755
--- a/distiller/model_summaries.py
+++ b/distiller/model_summaries.py
@@ -56,7 +56,6 @@ def model_summary(model, what, dataset=None):
         total_macs = df['MACs'].sum()
         print(t)
         print("Total MACs: " + "{:,}".format(total_macs))
-
     elif what == 'model':
         # print the simple form of the model
         print(model)
@@ -71,21 +70,20 @@ def model_summary(model, what, dataset=None):
             if len(module._modules) == 0:
                 nodes.append([name, module.__class__.__name__])
         print(tabulate(nodes, headers=['Name', 'Type']))
+    else:
+        raise ValueError("%s is not a supported summary type" % what)
 
 
 def weights_sparsity_summary(model, return_total_sparsity=False, param_dims=[2, 4]):
-
     df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)',
-                               'Cols (%)','Rows (%)', 'Ch (%)', '2D (%)', '3D (%)',
+                               'Cols (%)', 'Rows (%)', 'Ch (%)', '2D (%)', '3D (%)',
                                'Fine (%)', 'Std', 'Mean', 'Abs-Mean'])
     pd.set_option('precision', 2)
     params_size = 0
     sparse_params_size = 0
-    summary_param_types = ['weight', 'bias']
     for name, param in model.state_dict().items():
         # Extract just the actual parameter's name, which in this context we treat as its "type"
-        curr_param_type = name.split('.')[-1]
-        if param.dim() in param_dims and curr_param_type in summary_param_types:
+        if param.dim() in param_dims and any(type in name for type in ['weight', 'bias']):
             _density = distiller.density(param)
             params_size += torch.numel(param)
             sparse_params_size += param.numel() * _density
diff --git a/distiller/pruning/ranked_structures_pruner.py b/distiller/pruning/ranked_structures_pruner.py
index 683f11822fbefb26351e90e9c832731dfcd973c1..4816410c900b912ceb64852edac7b14a33cbbf8b 100755
--- a/distiller/pruning/ranked_structures_pruner.py
+++ b/distiller/pruning/ranked_structures_pruner.py
@@ -20,6 +20,7 @@ import distiller
 from .pruner import _ParameterPruner
 msglogger = logging.getLogger()
 
+# TODO: support different policies for ranking structures
 class L1RankedStructureParameterPruner(_ParameterPruner):
     """Uses mean L1-norm to rank structures and prune a specified percentage of structures
     """
@@ -28,6 +29,7 @@ class L1RankedStructureParameterPruner(_ParameterPruner):
         self.name = name
         self.reg_regims = reg_regims
 
+
     def set_param_mask(self, param, param_name, zeros_mask_dict, meta):
         if param_name not in self.reg_regims.keys():
             return
@@ -37,7 +39,59 @@ class L1RankedStructureParameterPruner(_ParameterPruner):
         if fraction_to_prune == 0:
             return
 
-        assert group_type == "3D", "Currently only filter ranking is supported"
+        if group_type not in ['3D', 'Channels']:
+            raise ValueError("Currently only filter (3D) and channel ranking is supported")
+        if group_type == "3D":
+            return self.rank_prune_filters(fraction_to_prune, param, param_name, zeros_mask_dict)
+        elif group_type == "Channels":
+            return self.rank_prune_channels(fraction_to_prune, param, param_name, zeros_mask_dict)
+
+    @staticmethod
+    def rank_channels(fraction_to_prune, param):
+        num_filters = param.size(0)
+        num_channels = param.size(1)
+        kernel_size = param.size(2) * param.size(3)
+
+        # First, reshape the weights tensor such that each channel (kernel) in the original
+        # tensor, is now a row in the 2D tensor.
+        view_2d = param.view(-1, kernel_size)
+        # Next, compute the sums of each kernel
+        kernel_sums = view_2d.abs().sum(dim=1)
+        # Now group by channels
+        k_sums_mat = kernel_sums.view(num_filters, num_channels).t()
+        channel_mags = k_sums_mat.mean(dim=1)
+        k = int(fraction_to_prune * channel_mags.size(0))
+        if k == 0:
+            msglogger.info("Too few channels (%d)- can't prune %.1f%% channels",
+                            num_channels, 100*fraction_to_prune)
+            return None, None
+
+        bottomk, _ = torch.topk(channel_mags, k, largest=False, sorted=True)
+        return bottomk, channel_mags
+
+
+    def rank_prune_channels(self, fraction_to_prune, param, param_name, zeros_mask_dict):
+        bottomk_channels, channel_mags = self.rank_channels(fraction_to_prune, param)
+        if bottomk_channels is None:
+            # Empty list means that fraction_to_prune is too low to prune anything
+            return
+
+        num_filters = param.size(0)
+        num_channels = param.size(1)
+
+        threshold = bottomk_channels[-1]
+        binary_map = channel_mags.gt(threshold).type(param.data.type())
+        a = binary_map.expand(num_filters, num_channels)
+        c = a.unsqueeze(-1)
+        d = c.expand(num_filters, num_channels, param.size(2) * param.size(3)).contiguous()
+        zeros_mask_dict[param_name].mask = d.view(num_filters, num_channels, param.size(2), param.size(3))
+
+        msglogger.info("L1RankedStructureParameterPruner - param: %s pruned=%.3f goal=%.3f (%d/%d)", param_name,
+                       distiller.sparsity_ch(zeros_mask_dict[param_name].mask),
+                       fraction_to_prune, len(bottomk_channels), num_channels)
+
+
+    def rank_prune_filters(self, fraction_to_prune, param, param_name, zeros_mask_dict):
         assert param.dim() == 4, "This thresholding is only supported for 4D weights"
         view_filters = param.view(param.size(0), -1)
         filter_mags = view_filters.data.abs().mean(dim=1)
diff --git a/distiller/sensitivity.py b/distiller/sensitivity.py
index 69f8a0db51b3b1976ab75abe826d05725a64c937..8db2773f63cacb5cf31f5db9a39fbc0e2178dc8a 100755
--- a/distiller/sensitivity.py
+++ b/distiller/sensitivity.py
@@ -65,7 +65,8 @@ def perform_sensitivity_analysis(model, net_params, sparsities, test_func, group
     The test_func is expected to execute the model on a test/validation dataset,
     and return the results for top1 and top5 accuracies, and the loss value.
     """
-    assert group in ['element', 'filter']
+    if group not in ['element', 'filter', 'channel']:
+        raise ValueError("group parameter contains an illegal value: {}".format(group))
     sensitivities = OrderedDict()
 
     for param_name in net_params:
@@ -86,12 +87,18 @@ def perform_sensitivity_analysis(model, net_params, sparsities, test_func, group
                 # Element-wise sparasity
                 sparsity_levels = {param_name: sparsity_level}
                 pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels)
-            else:
+            elif group == 'filter':
                 # Filter ranking
                 if model.state_dict()[param_name].dim() != 4:
                     continue
                 regims = {param_name: [sparsity_level, '3D']}
                 pruner = distiller.pruning.L1RankedStructureParameterPruner(name='sensitivity', reg_regims=regims)
+            elif group == 'channel':
+                # Filter ranking
+                if model.state_dict()[param_name].dim() != 4:
+                    continue
+                regims = {param_name: [sparsity_level, 'Channels']}
+                pruner = distiller.pruning.L1RankedStructureParameterPruner(name='sensitivity', reg_regims=regims)
 
             policy = distiller.PruningPolicy(pruner, pruner_args=None)
             scheduler = CompressionScheduler(model_cpy)
diff --git a/distiller/thinning.py b/distiller/thinning.py
index de7b6252dc8c056ceda5cfc73e46061c956e5dd6..684d715535b407a3a5d1175974e2422114658e86 100755
--- a/distiller/thinning.py
+++ b/distiller/thinning.py
@@ -15,13 +15,11 @@
 #
 
 """Model thinning support.
-
 Thinning a model is the process of taking a dense network architecture with a parameter model that
 has structure-sparsity (filters or channels) in the weights tensors of convolution layers, and making changes
 in the network architecture and parameters, in order to completely remove the structures.
 The new architecture is smaller (condensed), with less channels and filters in some of the convolution layers.
 Linear and BatchNormalization layers are also adjusted as required.
-
 To perform thinning, we create a SummaryGraph (â€˜sgraphâ€™) of our model.  We use the â€˜sgraphâ€™ to infer the
 data-dependency between the modules in the PyTorch network.  This entire process is not trivial and will be
 documented in a different place.
@@ -42,11 +40,8 @@ ThinningRecipe = namedtuple('ThinningRecipe', ['modules', 'parameters'])
 """A ThinningRecipe is composed of two sets of instructions.
 1. Instructions for setting module attributes (e.g. Conv2d.out_channels).  This set
 is called 'ThinningRecipe.modules'.
-
 2. Information on how to select specific dimensions from parameter tensors.  This
 set is called 'ThinningRecipe.parameters'.
-
-
 ThinningRecipe.modules is a dictionary keyed by the module names (strings).  Values
 are called 'module-directives', and are grouped in another dictionary, whose keys are
 the module attributes.  For example:
@@ -55,7 +50,6 @@ the module attributes.  For example:
         out_channels: 512
     classifier.0:
         in_channels: 22589
-
 ThinningRecipe.parameters is a dictionary keyed by the parameter names (strings).
 Values are called 'parameter directives', and each directive is a list of tuples.
 These tuples can have 2 values, or 4 values.
@@ -78,7 +72,7 @@ def create_graph(dataset, arch):
 
     model = create_model(False, dataset, arch, parallel=False)
     assert model is not None
-    return SummaryGraph(model, dummy_input)
+    return SummaryGraph(model, dummy_input.cuda())
 
 
 def param_name_2_layer_name(param_name):
@@ -99,7 +93,6 @@ def append_module_directive(thinning_recipe, module_name, key, val):
 
 def bn_thinning(thinning_recipe, layers, bn_name, len_thin_features, thin_features):
     """Adjust the sizes of the parameters of a BatchNormalization layer
-
     This function is invoked after the Convolution layer preceeding a BN layer has
     changed dimensions (filters or channels were removed), and the BN layer also
     requires updating as a result.
@@ -123,7 +116,6 @@ def bn_thinning(thinning_recipe, layers, bn_name, len_thin_features, thin_featur
 
 def resnet_cifar_remove_layers(model):
     """Remove layers from ResNet-Cifar
-
     Search for convolution layers which have 100% sparse weight tensors and remove
     them from the model.  This ugly code is specific to ResNet for Cifar, using the
     layer gating mechanism that we added in order to remove layers from the network.
@@ -158,7 +150,6 @@ def remove_channels(model, zeros_mask_dict, arch, dataset, optimizer):
 
 def find_nonzero_channels(param, param_name):
     """Count the number of non-zero channels in a weights tensor.
-
     Non-zero channels are channels that have at least one coefficient that is
     non-zero.  Counting non-zero channels involves some tensor acrobatics.
     """
@@ -213,7 +204,6 @@ def remove_filters(model, zeros_mask_dict, arch, dataset, optimizer):
 
 def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
     """Create a recipe for removing channels from Convolution layers.
-
     The 4D weights of the model parameters (i.e. the convolution parameters) are
     examined one by one, to determine which has channels that are all zeros.
     For each weights tensor that has at least one zero-channel, we create a
@@ -281,7 +271,6 @@ def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
 
 def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
     """Create a recipe for removing filters from Convolution layers.
-
     The 4D weights of the model parameters (i.e. the convolution parameters) are
     examined one by one, to determine which has filters that are all zeros.
     For each weights tensor that has at least one zero-filter, we create a
@@ -337,7 +326,7 @@ def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
             if isinstance(layers[successor], torch.nn.modules.Conv2d):
                 # For each of the convolutional layers that follow, we have to reduce the number of input channels.
                 append_module_directive(thinning_recipe, successor, key='in_channels', val=num_nnz_filters)
-                msglogger.info("[recipe] {}: setting in_channels = {}".format(successor, num_nnz_filters))
+                msglogger.debug("[recipe] {}: setting in_channels = {}".format(successor, num_nnz_filters))
 
                 # Now remove channels from the weights tensor of the successor conv
                 append_param_directive(thinning_recipe, successor+'.weight', (1, indices))
@@ -347,7 +336,7 @@ def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
                 fm_size = layers[successor].in_features // layers[layer_name].out_channels
                 in_features = fm_size * num_nnz_filters
                 append_module_directive(thinning_recipe, successor, key='in_features', val=in_features)
-                msglogger.info("[recipe] {}: setting in_features = {}".format(successor, in_features))
+                msglogger.debug("[recipe] {}: setting in_features = {}".format(successor, in_features))
 
                 # Now remove channels from the weights tensor of the successor FC layer:
                 # This is a bit tricky:
@@ -450,7 +439,6 @@ def optimizer_thinning(optimizer, param, dim, indices, new_shape=None):
 
 def execute_thinning_recipe(model, zeros_mask_dict, recipe, optimizer, loaded_from_file=False):
     """Apply a thinning recipe to a model.
-
     This will remove filters and channels, as well as handle batch-normalization parameter
     adjustment, and thinning of weight tensors.
     """
@@ -467,12 +455,12 @@ def execute_thinning_recipe(model, zeros_mask_dict, recipe, optimizer, loaded_fr
                 indices_to_select = val[1]
                 # Check if we're trying to trim a parameter that is already "thin"
                 if running.size(dim_to_trim) != indices_to_select.nelement():
-                    msglogger.info("[thinning] {}: setting {} to {}".
+                    msglogger.debug("[thinning] {}: setting {} to {}".
                                    format(layer_name, attr, indices_to_select.nelement()))
                     setattr(layers[layer_name], attr,
                             torch.index_select(running, dim=dim_to_trim, index=indices_to_select))
             else:
-                msglogger.info("[thinning] {}: setting {} to {}".format(layer_name, attr, val))
+                msglogger.debug("[thinning] {}: setting {} to {}".format(layer_name, attr, val))
                 setattr(layers[layer_name], attr, val)
 
     assert len(recipe.parameters) > 0
@@ -503,7 +491,7 @@ def execute_thinning_recipe(model, zeros_mask_dict, recipe, optimizer, loaded_fr
             else:
                 if param.data.size(dim) != len_indices:
                     param.data = torch.index_select(param.data, dim, indices)
-                    msglogger.info("[thinning] changed param {} shape: {}".format(param_name, len_indices))
+                    msglogger.debug("[thinning] changed param {} shape: {}".format(param_name, len_indices))
                 # We also need to change the dimensions of the gradient tensor.
                 # If have not done a backward-pass thus far, then the gradient will
                 # not exist, and therefore won't need to be re-dimensioned.
diff --git a/distiller/utils.py b/distiller/utils.py
index da90f64d856daee6af59c9a2aa56857e4bcc43d1..17279538e113814dc92955b05790d0e1600ba110 100755
--- a/distiller/utils.py
+++ b/distiller/utils.py
@@ -52,6 +52,10 @@ def size_to_str(torch_size):
     return '('+(', ').join(['%d' % v for v in torch_size])+')'
 
 
+def pretty_int(i):
+    return "{:,}".format(i)
+
+
 def normalize_module_name(layer_name):
     """Normalize a module's name.
 
diff --git a/examples/automated_deep_compression/ADC.py b/examples/automated_deep_compression/ADC.py
new file mode 100755
index 0000000000000000000000000000000000000000..cca53e17d4fa0d984e8bb5d1c1027282ba5b7b9c
--- /dev/null
+++ b/examples/automated_deep_compression/ADC.py
@@ -0,0 +1,357 @@
+import random
+import math
+import copy
+import logging
+import numpy as np
+import torch
+import gym
+from gym import spaces
+import distiller
+from apputils import SummaryGraph
+from collections import OrderedDict, namedtuple
+from types import SimpleNamespace
+from distiller import normalize_module_name
+
+from base_parameters import TaskParameters
+from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager
+
+msglogger = logging.getLogger()
+Observation = namedtuple('Observation', ['t', 'n', 'c', 'h', 'w', 'stride', 'k', 'MACs', 'reduced', 'rest', 'prev_a'])
+ALMOST_ONE = 0.9999
+
+# TODO: this is also defined in test_pruning.py
+def create_model_masks(model):
+    # Create the masks
+    zeros_mask_dict = {}
+    for name, param in model.named_parameters():
+        masker = distiller.ParameterMasker(name)
+        zeros_mask_dict[name] = masker
+    return zeros_mask_dict
+
+
+USE_COACH = True
+
+
+def do_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn):
+    np.random.seed()
+
+    if USE_COACH:
+        task_parameters = TaskParameters(framework_type="tensorflow",
+                                         experiment_path="./experiments/test")
+        extra_params = {'save_checkpoint_secs': None,
+                        'render': True}
+        task_parameters.__dict__.update(extra_params)
+
+        graph_manager.env_params.additional_simulator_parameters = {
+            'model': model,
+            'dataset': dataset,
+            'arch': arch,
+            'data_loader': data_loader,
+            'validate_fn': validate_fn,
+            'save_checkpoint_fn': save_checkpoint_fn
+        }
+        graph_manager.create_graph(task_parameters)
+        graph_manager.improve()
+        return
+
+    """Random ADC agent"""
+    env = CNNEnvironment(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn)
+
+    for ep in range(10):
+        observation = env.reset()
+        for t in range(100):
+            env.render(0, 0)
+            msglogger.info("[episode={}:{}] observation = {}".format(ep, t, observation))
+            # take a random action
+            action = env.action_space.sample()
+            observation, reward, done, info = env.step(action)
+            if done:
+                msglogger.info("Episode finished after {} timesteps".format(t+1))
+                break
+
+
+class RandomADCActionSpace(object):
+    def sample(self):
+        return random.uniform(0, 1)
+
+
+def collect_conv_details(model, dataset):
+    if dataset == 'imagenet':
+        dummy_input = torch.randn(1, 3, 224, 224)
+    elif dataset == 'cifar10':
+        dummy_input = torch.randn(1, 3, 32, 32)
+    else:
+        raise ValueError("dataset %s is not supported" % dataset)
+
+    g = SummaryGraph(model.cuda(), dummy_input.cuda())
+    conv_layers = OrderedDict()
+    total_macs = 0
+    for id, (name, m) in enumerate(model.named_modules()):
+        if isinstance(m, torch.nn.Conv2d):
+            conv = SimpleNamespace()
+            conv.t = len(conv_layers)
+            conv.k = m.kernel_size[0]
+            conv.stride = m.stride
+
+            # Use the SummaryGraph to obtain some other details of the models
+            conv_op = g.find_op(normalize_module_name(name))
+            assert conv_op is not None
+
+            conv.macs = conv_op['attrs']['MACs']
+            total_macs += conv.macs
+            conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2]
+            conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3]
+            conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2]
+            conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3]
+
+            conv.name = name
+            conv.id = id
+            conv_layers[len(conv_layers)] = conv
+
+    return conv_layers, total_macs
+
+
+class CNNEnvironment(gym.Env):
+    metadata = {'render.modes': ['human']}
+    STATE_EMBEDDING_LEN = len(Observation._fields)
+
+    def __init__(self, model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn):
+        self.pylogger = distiller.data_loggers.PythonLogger(msglogger)
+        self.tflogger = distiller.data_loggers.TensorBoardLogger(msglogger.logdir)
+
+        self.action_space = RandomADCActionSpace()
+        self.dataset = dataset
+        self.arch = arch
+        self.data_loader = data_loader
+        self.validate_fn = validate_fn
+        self.save_checkpoint_fn = save_checkpoint_fn
+        self.orig_model = model
+
+        self.conv_layers, self.dense_model_macs = collect_conv_details(model, dataset)
+        self.reset(init_only=True)
+        msglogger.info("Model %s has %d Convolution layers", arch, len(self.conv_layers))
+        msglogger.info("\tTotal MACs: %s" % distiller.pretty_int(self.dense_model_macs))
+
+        self.debug_stats = {'episode': 0}
+
+        # Gym
+        # spaces documentation: https://gym.openai.com/docs/
+        self.action_space = spaces.Box(0, 1, shape=(1,))
+        self.observation_space = spaces.Box(0, float("inf"), shape=(self.STATE_EMBEDDING_LEN,))
+
+    def reset(self, init_only=False):
+        """Reset the environment.
+        This is invoked by the Agent.
+        """
+        msglogger.info("Resetting the environment")
+        self.current_layer_id = 0
+        self.prev_action = 0
+        self.model = copy.deepcopy(self.orig_model)
+        self.zeros_mask_dict = create_model_masks(self.model)
+        self._remaining_macs = self.dense_model_macs
+        self._removed_macs = 0
+
+        # self.unprocessed_layers = []
+        # for conv in self.conv_layers:
+        #     self.unprocessed_layers.append(conv)
+        # self.processed_layers = []
+        if init_only:
+            return
+
+        #layer_macs = self.get_macs(self.current_layer())
+        #return self._get_obs(layer_macs)
+        obs, _, _, _, = self.step(0)
+        return obs
+
+
+    def num_layers(self):
+        return len(self.conv_layers)
+
+    def current_layer(self):
+        try:
+            return self.conv_layers[self.current_layer_id]
+        except KeyError:
+            return None
+
+    def episode_is_done(self):
+        return self.current_layer_id == self.num_layers()
+
+    def remaining_macs(self):
+        """Return the amount of MACs remaining in the model's unprocessed
+        Convolution layers.
+        This is normalized to the range 0..1
+        """
+        #return 1 - self.sum_list_macs(self.unprocessed_layers) / self.dense_model_macs
+        return self._remaining_macs / self.dense_model_macs
+
+    def removed_macs(self):
+        """Return the amount of MACs removed so far.
+        This is normalized to the range 0..1
+        """
+        #return self.sum_list_macs(self.processed_layers) / self.dense_model_macs
+        return self._removed_macs / self.dense_model_macs
+
+    # def sum_list_macs(self, conv_list):
+    #     """Sum the MACs in the provided list of Convolution layers"""
+    #     total_macs = 0
+    #     for conv in conv_list:
+    #         total_macs += conv.macs
+    #     return total_macs
+
+    def render(self, mode, close):
+        """Provide some feedback to the user about what's going on
+        This is invoked by the Agent.
+        """
+        if self.current_layer_id == 0:
+            msglogger.info("+" + "-" * 50 + "+")
+            msglogger.info("Starting a new episode")
+            msglogger.info("+" + "-" * 50 + "+")
+
+        msglogger.info("Environment: current_layer_id=%d" % self.current_layer_id)
+        distiller.log_weights_sparsity(self.model, -1, loggers=[self.pylogger])
+
+    def step(self, action):
+        """Take a step, given an action.
+        This is invoked by the Agent.
+        """
+        layer_macs = self.get_macs(self.current_layer())
+        if action > 0:
+            actual_action = self.__remove_channels(self.current_layer_id, action)
+        else:
+            actual_action = 0
+        layer_macs_after_action = self.get_macs(self.current_layer())
+
+        # Update the various counters after taking the step
+        self.current_layer_id += 1
+        next_layer_macs = self.get_macs(self.current_layer())
+        self._removed_macs += (layer_macs - layer_macs_after_action)
+        self._remaining_macs -= next_layer_macs
+
+        #self.prev_action = actual_action
+        if self.episode_is_done():
+            observation = self.get_final_obs()
+            reward = self.compute_reward()
+            # Save the learned-model checkpoint
+            scheduler = distiller.CompressionScheduler(self.model)
+            scheduler.load_state_dict(state={'masks_dict': self.zeros_mask_dict})
+            self.save_checkpoint_fn(epoch=self.debug_stats['episode'], model=self.model, scheduler=scheduler)
+            self.debug_stats['episode'] += 1
+        else:
+            observation = self._get_obs(next_layer_macs)
+            if True:
+                reward = 0
+            else:
+                reward = self.compute_reward()
+
+        self.prev_action = actual_action
+        info = {}
+        return observation, reward, self.episode_is_done(), info
+
+    def _get_obs(self, macs):
+        """Produce a state embedding (i.e. an observation)"""
+
+        layer = self.current_layer()
+        conv_module = distiller.model_find_module(self.model, layer.name)
+
+        obs = np.array([layer.t, conv_module.out_channels, conv_module.in_channels,
+                        layer.ifm_h, layer.ifm_w, layer.stride[0], layer.k,
+                        macs/self.dense_model_macs, self.removed_macs(), self.remaining_macs(), self.prev_action])
+
+        assert len(obs) == self.STATE_EMBEDDING_LEN
+        assert (macs/self.dense_model_macs + self.removed_macs() + self.remaining_macs()) <= 1
+        msglogger.info("obs={}".format(Observation._make(obs)))
+        return obs
+
+    def get_final_obs(self):
+        """Return the final stae embedding (observation)
+        The final state is reached after we traverse all of the Convolution layers.
+        """
+        obs = np.array([-1, 0, 0,
+                         0, 0, 0, 0,
+                         0, self.removed_macs(), 0, self.prev_action])
+        assert len(obs) == self.STATE_EMBEDDING_LEN
+        return obs
+
+    def get_macs(self, layer):
+        """Return the number of MACs required to compute <layer>'s Convolution"""
+        if layer is None:
+            return 0
+
+        conv_module = distiller.model_find_module(self.model, layer.name)
+        # MACs = volume(OFM) * (#IFM * K^2)
+        return (conv_module.out_channels * layer.ofm_h * layer.ofm_w) * (conv_module.in_channels * layer.k**2)
+
+    def __remove_channels(self, idx, fraction_to_prune, prune_what="channels"):
+        """Physically remove channels and corresponding filters from the model"""
+        if idx not in range(self.num_layers()):
+            raise ValueError("idx=%d is not in correct range (0-%d)" % (idx, self.num_layers()))
+        if fraction_to_prune < 0:
+            raise ValueError("fraction_to_prune=%f is illegal" % (fraction_to_prune))
+
+        if fraction_to_prune == 0:
+            return 0
+        if fraction_to_prune == 1.0:
+            # For now, prevent the removal of entire layers
+            fraction_to_prune = ALMOST_ONE
+
+        layer = self.conv_layers[idx]
+        conv_pname = layer.name + ".weight"
+        conv_p = distiller.model_find_param(self.model, conv_pname)
+
+        msglogger.info("ADC: removing %.1f%% channels from %s" % (fraction_to_prune*100, conv_pname))
+
+        if prune_what == "channels":
+            calculate_sparsity = distiller.sparsity_ch
+            reg_regims = {conv_pname: [fraction_to_prune, "Channels"]}
+            remove_structures = distiller.remove_channels
+        else:
+            calculate_sparsity = distiller.sparsity_3D
+            reg_regims = {conv_pname: [fraction_to_prune, "3D"]}
+            remove_structures = distiller.remove_filters
+
+        # Create a channel-ranking pruner
+        pruner = distiller.pruning.L1RankedStructureParameterPruner("adc_pruner", reg_regims)
+        pruner.set_param_mask(conv_p, conv_pname, self.zeros_mask_dict, meta=None)
+
+        if (self.zeros_mask_dict[conv_pname].mask is None or
+            calculate_sparsity(self.zeros_mask_dict[conv_pname].mask) == 0):
+            msglogger.info("__remove_channels: aborting because there are no channels to prune")
+            return 0
+
+        # Use the mask to prune
+        self.zeros_mask_dict[conv_pname].apply_mask(conv_p)
+        actual_sparsity = calculate_sparsity(conv_p)
+        remove_structures(self.model, self.zeros_mask_dict, self.arch, self.dataset, optimizer=None)
+        return actual_sparsity
+
+    def compute_reward(self):
+        """The ADC paper defines reward = -Error"""
+        distiller.log_weights_sparsity(self.model, -1, loggers=[self.pylogger])
+
+        top1, top5, vloss = self.validate_fn(model=self.model, epoch=self.debug_stats['episode'])
+        _, total_macs = collect_conv_details(self.model, self.dataset)
+        reward = -1 * vloss * math.log(total_macs)
+        #reward = -1 * vloss * math.sqrt(math.log(total_macs))
+        #reward = top1 / math.log(total_macs)
+        #alpha = 0.9
+        #reward = -1 * ( (1-alpha)*(top1/100) + 10*alpha*(total_macs/self.dense_model_macs) )
+
+        #alpha = 0.99
+        #reward = -1 * ( (1-alpha)*(top1/100) + alpha*(total_macs/self.dense_model_macs) )
+
+        #reward = vloss * math.log(total_macs)
+        #reward = -1 * vloss * (total_macs / self.dense_model_macs)
+        #reward = top1 * (self.dense_model_macs / total_macs)
+        #reward = -1 * math.log(total_macs)
+        #reward =  -1 * vloss
+        stats = ('Peformance/Validation/',
+                 OrderedDict([('Loss', vloss),
+                              ('Top1', top1),
+                              ('Top5', top5),
+                              ('reward', reward),
+                              ('total_macs', int(total_macs)),
+                              ('log(total_macs)', math.log(total_macs))]))
+        distiller.log_training_progress(stats, None, self.debug_stats['episode'], steps_completed=0, total_steps=1,
+                                        log_freq=1, loggers=[self.tflogger, self.pylogger])
+
+        return reward
diff --git a/examples/automated_deep_compression/presets/ADC_DDPG.py b/examples/automated_deep_compression/presets/ADC_DDPG.py
new file mode 100755
index 0000000000000000000000000000000000000000..e7e5a1f5bc1ae27eb06c6b1e0d31c00680fd8487
--- /dev/null
+++ b/examples/automated_deep_compression/presets/ADC_DDPG.py
@@ -0,0 +1,73 @@
+from agents.ddpg_agent import DDPGAgentParameters
+from graph_managers.basic_rl_graph_manager import BasicRLGraphManager
+from graph_managers.graph_manager import ScheduleParameters
+from base_parameters import VisualizationParameters
+from core_types import EnvironmentEpisodes, EnvironmentSteps
+from environments.gym_environment import MujocoInputFilter, GymEnvironmentParameters, MujocoOutputFilter
+from exploration_policies.additive_noise import AdditiveNoiseParameters
+from exploration_policies.truncated_normal import TruncatedNormalParameters
+from schedules import ConstantSchedule, PieceWiseSchedule, ExponentialSchedule
+from memories.memory import MemoryGranularity
+from architectures.tensorflow_components.architecture import Dense
+
+####################
+# Block Scheduling #
+####################
+schedule_params = ScheduleParameters()
+schedule_params.improve_steps = EnvironmentEpisodes(400)
+if True:
+    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10)
+    schedule_params.evaluation_steps = EnvironmentEpisodes(3)
+else:
+    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(1)
+    schedule_params.evaluation_steps = EnvironmentEpisodes(1)
+schedule_params.heatup_steps = EnvironmentSteps(2)
+
+#####################
+# DDPG Agent Params #
+#####################
+agent_params = DDPGAgentParameters()
+agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense([300])]
+agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense([300])]
+agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense([300])]
+agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense([300])]
+agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = [Dense([300])]
+#agent_params.network_wrappers['critic'].clip_gradients = 100
+#agent_params.network_wrappers['actor'].clip_gradients = 100
+
+agent_params.algorithm.rate_for_copying_weights_to_target = 0.01  # Tau pg. 11
+agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
+# agent_params.memory.max_size = (MemoryGranularity.Episodes, 2000)
+agent_params.exploration = TruncatedNormalParameters() # AdditiveNoiseParameters()
+steps_per_episode = 13
+agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(0.5), EnvironmentSteps(100*steps_per_episode)),
+                                                                        (ExponentialSchedule(0.5, 0, 0.95), EnvironmentSteps(350*steps_per_episode))])
+agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
+agent_params.input_filter = MujocoInputFilter()
+agent_params.output_filter = MujocoOutputFilter()
+# agent_params.network_wrappers['actor'].learning_rate = 0.0001
+# agent_params.network_wrappers['critic'].learning_rate = 0.0001
+# These seem like good values for Reward = -Error
+agent_params.network_wrappers['actor'].learning_rate = 0.0001
+agent_params.network_wrappers['critic'].learning_rate = 0.0001
+# agent_params.network_wrappers['actor'].learning_rate = 0.1
+# agent_params.network_wrappers['critic'].learning_rate = 0.1
+# agent_params.network_wrappers['actor'].learning_rate =  0.000001
+# agent_params.network_wrappers['critic'].learning_rate = 0.000001
+
+##############################
+#      Gym                   #
+##############################
+env_params = GymEnvironmentParameters()
+#env_params.level = '/home/cvds_lab/nzmora/pytorch_workspace/distiller/examples/automated_deep_compression/gym_env/distiller_adc/distiller_adc.py:AutomatedDeepCompression'
+# This path works when training from Coach
+#env_params.level = '../distiller/examples/automated_deep_compression/gym_env/distiller_adc/distiller_adc.py:AutomatedDeepCompression'
+# This path works when training from Distiller
+#env_params.level = '../automated_deep_compression/gym_env/distiller_adc/distiller_adc.py:AutomatedDeepCompression'
+env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment'
+
+
+vis_params = VisualizationParameters()
+
+graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
+                                    schedule_params=schedule_params, vis_params=vis_params)
diff --git a/examples/classifier_compression/compress_classifier.py b/examples/classifier_compression/compress_classifier.py
index a540dd71d85636150e0d0155a38bbb9406a48695..f227b06ce35d09e6e611cef8de437e0e8ab885cc 100755
--- a/examples/classifier_compression/compress_classifier.py
+++ b/examples/classifier_compression/compress_classifier.py
@@ -67,11 +67,11 @@ import torch.backends.cudnn as cudnn
 import torch.optim
 import torch.utils.data
 import torchnet.meter as tnt
+script_dir = os.path.dirname(__file__)
+module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
 try:
     import distiller
 except ImportError:
-    script_dir = os.path.dirname(__file__)
-    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
     sys.path.append(module_path)
     import distiller
 import apputils
@@ -79,6 +79,7 @@ from distiller.data_loggers import TensorBoardLogger, PythonLogger, ActivationSp
 import distiller.quantization as quantization
 from models import ALL_MODEL_NAMES, create_model
 
+
 # Logger handle
 msglogger = None
 
@@ -127,7 +128,7 @@ parser.add_argument('--summary', type=str, choices=SUMMARY_CHOICES,
                     ' | '.join(SUMMARY_CHOICES))
 parser.add_argument('--compress', dest='compress', type=str, nargs='?', action='store',
                     help='configuration file for pruning the model (default is to use hard-coded schedule)')
-parser.add_argument('--sense', dest='sensitivity', choices=['element', 'filter'],
+parser.add_argument('--sense', dest='sensitivity', choices=['element', 'filter', 'channel'],
                     help='test the sensitivity of layers to pruning')
 parser.add_argument('--extras', default=None, type=str,
                     help='file with extra configuration information')
@@ -141,6 +142,7 @@ parser.add_argument('--name', '-n', metavar='NAME', default=None, help='Experime
 parser.add_argument('--out-dir', '-o', dest='output_dir', default='logs', help='Path to dump logs and checkpoints')
 parser.add_argument('--validation-size', '--vs', type=float_range, default=0.1,
                     help='Portion of training dataset to set aside for validation')
+parser.add_argument('--adc', dest='ADC', action='store_true', help='temp HACK')
 
 
 def check_pytorch_version():
@@ -210,7 +212,6 @@ def main():
 
     # Create the model
     model = create_model(args.pretrained, args.dataset, args.arch, device_ids=args.gpus)
-
     compression_scheduler = None
     # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
     # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
@@ -230,6 +231,23 @@ def main():
     msglogger.info('Optimizer Type: %s', type(optimizer))
     msglogger.info('Optimizer Args: %s', optimizer.defaults)
 
+    if args.ADC:
+        HAVE_GYM_INSTALLED = False
+        if not HAVE_GYM_INSTALLED:
+            raise ValueError("ADC is currently experimental and uses non-public Coach features")
+
+        import examples.automated_deep_compression.ADC as ADC
+        train_loader, val_loader, test_loader, _ = apputils.load_data(
+            args.dataset, os.path.expanduser(args.data), args.batch_size,
+            args.workers, args.validation_size, args.deterministic)
+
+        validate_fn = partial(validate, val_loader=test_loader, criterion=criterion,
+                              loggers=[pylogger], print_freq=args.print_freq)
+
+        save_checkpoint_fn = partial(apputils.save_checkpoint, arch=args.arch, name='adc')
+        ADC.do_adc(model, args.dataset, args.arch, val_loader, validate_fn, save_checkpoint_fn)
+        exit()
+
     # This sample application can be invoked to produce various summary reports.
     if args.summary:
         which_summary = args.summary
@@ -264,7 +282,7 @@ def main():
         which_params = [param_name for param_name, _ in model.named_parameters()]
         sensitivity = distiller.perform_sensitivity_analysis(model,
                                                              net_params=which_params,
-                                                             sparsities=np.arange(0.0, 0.50, 0.05) if args.sensitivity == 'filter' else np.arange(0.0, 0.95, 0.05),
+                                                             sparsities=np.arange(0.0, 0.95, 0.05),
                                                              test_func=test_fnc,
                                                              group=args.sensitivity)
         distiller.sensitivities_to_png(sensitivity, 'sensitivity.png')
@@ -358,7 +376,7 @@ def train(train_loader, model, criterion, optimizer, epoch,
         data_time.add(time.time() - end)
 
         target = target.cuda(async=True)
-        input_var = torch.autograd.Variable(inputs)
+        input_var = inputs.cuda()
         target_var = torch.autograd.Variable(target)
 
         # Execute the forward phase, compute the output and measure loss
@@ -487,6 +505,7 @@ class PytorchNoGrad(object):
 
 def get_inference_var(tensor):
     """This is a temporary function to bridge some difference between PyTorch 3.x and 4.x"""
+    tensor = tensor.cuda(async=True)
     if torch.__version__ >= '0.4':
         return torch.autograd.Variable(tensor)
     return torch.autograd.Variable(tensor, volatile=True)
@@ -495,10 +514,12 @@ def get_inference_var(tensor):
 if __name__ == '__main__':
     try:
         main()
+    except KeyboardInterrupt:
+        print("\n-- KeyboardInterrupt --")
     except Exception as e:
         if msglogger is not None:
             msglogger.error(traceback.format_exc())
-        raise e
+        raise
     finally:
         if msglogger is not None:
             msglogger.info('')
diff --git a/examples/pruning_filters_for_efficient_convnets/resnet56_cifar_baseline_training.yaml b/examples/pruning_filters_for_efficient_convnets/resnet56_cifar_baseline_training.yaml
index 01c5c7ac6da7ea8d7c9cbf71b0a4815dbde36cce..86923012adecd2b36f7a2e1ce6d69329f73fd625 100755
--- a/examples/pruning_filters_for_efficient_convnets/resnet56_cifar_baseline_training.yaml
+++ b/examples/pruning_filters_for_efficient_convnets/resnet56_cifar_baseline_training.yaml
@@ -1,88 +1,83 @@
 # We used this schedule to train CIFAR10-ResNet56 from scratch
 #
-# time python3 compress_classifier.py --arch resnet56_cifar  ../data.cifar10 -p=50 --lr=0.4 --epochs=180 --compress=../pruning_filters_for_efficient_convnets/resnet56_cifar_baseline_training.yaml -j=1 --deterministic
+# time python3 compress_classifier.py --arch resnet56_cifar  ../../../data.cifar10 -p=50 --lr=0.3 --epochs=180 --compress=../pruning_filters_for_efficient_convnets/resnet56_cifar_baseline_training.yaml -j=1 --deterministic
 #
-# Target: 6.96% error was reported Pruning Filters for Efficient Convnets 
+# Target: 6.96% error was reported Pruning Filters for Efficient Convnets
 #
+# Parameters:
 # +----+-------------------------------------+----------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+
 # |    | Name                                | Shape          |   NNZ (dense) |   NNZ (sparse) |   Cols (%) |   Rows (%) |   Ch (%) |   2D (%) |   3D (%) |   Fine (%) |     Std |     Mean |   Abs-Mean |
 # |----+-------------------------------------+----------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------|
-# |  0 | module.conv1.weight                 | (16, 3, 3, 3)  |           432 |            432 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.34148 |  0.01379 |    0.14357 |
-# |  1 | module.layer1.0.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06301 |  0.00203 |    0.02347 |
-# |  2 | module.layer1.0.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.05760 |  0.00007 |    0.02742 |
-# |  3 | module.layer1.1.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04780 |  0.00338 |    0.02383 |
-# |  4 | module.layer1.1.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04858 | -0.00358 |    0.02670 |
-# |  5 | module.layer1.2.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07968 |  0.00273 |    0.04429 |
-# |  6 | module.layer1.2.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07640 | -0.00262 |    0.04895 |
-# |  7 | module.layer1.3.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.10044 | -0.00384 |    0.05374 |
-# |  8 | module.layer1.3.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09931 | -0.00360 |    0.06238 |
-# |  9 | module.layer1.4.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08298 | -0.00024 |    0.05489 |
-# | 10 | module.layer1.4.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08289 | -0.00766 |    0.05761 |
-# | 11 | module.layer1.5.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11349 | -0.00590 |    0.08049 |
-# | 12 | module.layer1.5.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.10642 | -0.00195 |    0.07803 |
-# | 13 | module.layer1.6.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.10367 | -0.00788 |    0.07537 |
-# | 14 | module.layer1.6.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09865 | -0.00195 |    0.07261 |
-# | 15 | module.layer1.7.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12612 | -0.00886 |    0.09447 |
-# | 16 | module.layer1.7.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11011 |  0.00163 |    0.08398 |
-# | 17 | module.layer1.8.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.13849 | -0.01522 |    0.10323 |
-# | 18 | module.layer1.8.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.10821 | -0.00555 |    0.08318 |
-# | 19 | module.layer2.0.conv1.weight        | (32, 16, 3, 3) |          4608 |           4608 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.13763 | -0.00246 |    0.10269 |
-# | 20 | module.layer2.0.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11410 | -0.00401 |    0.08719 |
-# | 21 | module.layer2.0.downsample.0.weight | (32, 16, 1, 1) |           512 |            512 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.25910 |  0.01282 |    0.18712 |
-# | 22 | module.layer2.1.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09005 | -0.00572 |    0.06956 |
-# | 23 | module.layer2.1.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08083 | -0.00496 |    0.06368 |
-# | 24 | module.layer2.2.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07592 | -0.00750 |    0.05929 |
-# | 25 | module.layer2.2.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06707 | -0.00587 |    0.05252 |
-# | 26 | module.layer2.3.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07829 | -0.00719 |    0.06119 |
-# | 27 | module.layer2.3.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06452 | -0.00374 |    0.05061 |
-# | 28 | module.layer2.4.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06574 | -0.00771 |    0.04972 |
-# | 29 | module.layer2.4.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.05378 | -0.00263 |    0.03984 |
-# | 30 | module.layer2.5.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07436 | -0.00515 |    0.05701 |
-# | 31 | module.layer2.5.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06059 | -0.00472 |    0.04677 |
-# | 32 | module.layer2.6.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06222 | -0.00527 |    0.04587 |
-# | 33 | module.layer2.6.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04993 | -0.00212 |    0.03606 |
-# | 34 | module.layer2.7.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06119 | -0.00785 |    0.04308 |
-# | 35 | module.layer2.7.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04664 | -0.00216 |    0.03203 |
-# | 36 | module.layer2.8.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06416 | -0.00867 |    0.04732 |
-# | 37 | module.layer2.8.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04904 | -0.00276 |    0.03586 |
-# | 38 | module.layer3.0.conv1.weight        | (64, 32, 3, 3) |         18432 |          18432 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08855 | -0.00176 |    0.06946 |
-# | 39 | module.layer3.0.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07219 |  0.00106 |    0.05211 |
-# | 40 | module.layer3.0.downsample.0.weight | (64, 32, 1, 1) |          2048 |           2048 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.13456 |  0.00539 |    0.09422 |
-# | 41 | module.layer3.1.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.05082 | -0.00166 |    0.03574 |
-# | 42 | module.layer3.1.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04510 | -0.00510 |    0.03232 |
-# | 43 | module.layer3.2.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.05256 | -0.00417 |    0.03748 |
-# | 44 | module.layer3.2.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04448 | -0.00243 |    0.03171 |
-# | 45 | module.layer3.3.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04190 | -0.00189 |    0.03038 |
-# | 46 | module.layer3.3.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03494 | -0.00418 |    0.02498 |
-# | 47 | module.layer3.4.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04426 | -0.00368 |    0.03268 |
-# | 48 | module.layer3.4.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03460 | -0.00293 |    0.02468 |
-# | 49 | module.layer3.5.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04880 | -0.00321 |    0.03613 || 50 | module.layer3.5.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03709 |  0.00014 |    0.02571 |
-# | 51 | module.layer3.6.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02699 | -0.00166 |    0.01931 |
-# | 52 | module.layer3.6.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02024 | -0.00064 |    0.01354 |
-# | 53 | module.layer3.7.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02466 | -0.00162 |    0.01766 |
-# | 54 | module.layer3.7.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.01816 | -0.00159 |    0.01202 |
-# | 55 | module.layer3.8.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03662 | -0.00271 |    0.02692 |
-# | 56 | module.layer3.8.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02626 |  0.00011 |    0.01813 |
-# | 57 | module.fc.weight                    | (10, 64)       |           640 |            640 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.52207 | -0.00001 |    0.39151 |
+# |  0 | module.conv1.weight                 | (16, 3, 3, 3)  |           432 |            432 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.39191 |  0.00826 |    0.18757 |
+# |  1 | module.layer1.0.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08334 | -0.00180 |    0.03892 |
+# |  2 | module.layer1.0.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08565 | -0.00033 |    0.05106 |
+# |  3 | module.layer1.1.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08190 |  0.00082 |    0.04765 |
+# |  4 | module.layer1.1.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08365 | -0.00600 |    0.05459 |
+# |  5 | module.layer1.2.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09640 | -0.00182 |    0.06337 |
+# |  6 | module.layer1.2.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09881 | -0.00400 |    0.07056 |
+# |  7 | module.layer1.3.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.13412 | -0.00416 |    0.08827 |
+# |  8 | module.layer1.3.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12693 | -0.00271 |    0.09395 |
+# |  9 | module.layer1.4.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12149 | -0.01105 |    0.09064 |
+# | 10 | module.layer1.4.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11322 |  0.00333 |    0.08556 |
+# | 11 | module.layer1.5.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12076 | -0.01164 |    0.09311 |
+# | 12 | module.layer1.5.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11627 | -0.00355 |    0.08882 |
+# | 13 | module.layer1.6.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12492 | -0.00637 |    0.09493 |
+# | 14 | module.layer1.6.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11240 | -0.00837 |    0.08710 |
+# | 15 | module.layer1.7.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.13819 | -0.00735 |    0.10096 |
+# | 16 | module.layer1.7.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11107 | -0.00293 |    0.08613 |
+# | 17 | module.layer1.8.conv1.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12269 | -0.01133 |    0.09511 |
+# | 18 | module.layer1.8.conv2.weight        | (16, 16, 3, 3) |          2304 |           2304 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09276 |  0.00240 |    0.07117 |
+# | 19 | module.layer2.0.conv1.weight        | (32, 16, 3, 3) |          4608 |           4608 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.13876 | -0.01190 |    0.11061 |
+# | 20 | module.layer2.0.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12728 | -0.00499 |    0.10012 |
+# | 21 | module.layer2.0.downsample.0.weight | (32, 16, 1, 1) |           512 |            512 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.24306 | -0.01255 |    0.19073 |
+# | 22 | module.layer2.1.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.11474 | -0.00995 |    0.09044 |
+# | 23 | module.layer2.1.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.10452 | -0.00440 |    0.08196 |
+# | 24 | module.layer2.2.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09873 | -0.00629 |    0.07833 |
+# | 25 | module.layer2.2.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08747 | -0.00393 |    0.06891 |
+# | 26 | module.layer2.3.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.09434 | -0.00762 |    0.07469 |
+# | 27 | module.layer2.3.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07984 | -0.00449 |    0.06271 |
+# | 28 | module.layer2.4.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08767 | -0.00733 |    0.06852 |
+# | 29 | module.layer2.4.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06642 | -0.00396 |    0.05196 |
+# | 30 | module.layer2.5.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07521 | -0.00699 |    0.05799 |
+# | 31 | module.layer2.5.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.05739 | -0.00351 |    0.04334 |
+# | 32 | module.layer2.6.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06130 | -0.00595 |    0.04791 |
+# | 33 | module.layer2.6.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04703 | -0.00519 |    0.03527 |
+# | 34 | module.layer2.7.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06366 | -0.00734 |    0.04806 |
+# | 35 | module.layer2.7.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04591 | -0.00131 |    0.03282 |
+# | 36 | module.layer2.8.conv1.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.05903 | -0.00606 |    0.04555 |
+# | 37 | module.layer2.8.conv2.weight        | (32, 32, 3, 3) |          9216 |           9216 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04344 | -0.00566 |    0.03290 |
+# | 38 | module.layer3.0.conv1.weight        | (64, 32, 3, 3) |         18432 |          18432 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.08262 |  0.00251 |    0.06520 |
+# | 39 | module.layer3.0.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06248 |  0.00073 |    0.04578 |
+# | 40 | module.layer3.0.downsample.0.weight | (64, 32, 1, 1) |          2048 |           2048 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.12275 |  0.01139 |    0.08651 |
+# | 41 | module.layer3.1.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03438 | -0.00186 |    0.02419 |
+# | 42 | module.layer3.1.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03091 | -0.00368 |    0.02203 |
+# | 43 | module.layer3.2.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03477 | -0.00226 |    0.02499 |
+# | 44 | module.layer3.2.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03012 | -0.00350 |    0.02159 |
+# | 45 | module.layer3.3.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03577 | -0.00166 |    0.02608 |
+# | 46 | module.layer3.3.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02962 | -0.00124 |    0.02115 |
+# | 47 | module.layer3.4.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03694 | -0.00285 |    0.02677 |
+# | 48 | module.layer3.4.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02916 | -0.00165 |    0.02024 |
+# | 49 | module.layer3.5.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03158 | -0.00180 |    0.02342 |
+# | 50 | module.layer3.5.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02527 | -0.00177 |    0.01787 |
+# | 51 | module.layer3.6.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03074 | -0.00169 |    0.02256 |
+# | 52 | module.layer3.6.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02406 | -0.00006 |    0.01658 |
+# | 53 | module.layer3.7.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03160 | -0.00249 |    0.02294 |
+# | 54 | module.layer3.7.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02298 | -0.00083 |    0.01553 |
+# | 55 | module.layer3.8.conv1.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02594 | -0.00219 |    0.01890 |
+# | 56 | module.layer3.8.conv2.weight        | (64, 64, 3, 3) |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.01986 | -0.00061 |    0.01318 |
+# | 57 | module.fc.weight                    | (10, 64)       |           640 |            640 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.52562 | -0.00003 |    0.39168 |
 # | 58 | Total sparsity:                     | -              |        851504 |         851504 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.00000 |  0.00000 |    0.00000 |
 # +----+-------------------------------------+----------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+
-# Total sparsity: 0.00
+# 2018-07-02 16:36:31,555 - Total sparsity: 0.00
 #
-# --- validate (epoch=179)-----------
-# 5000 samples (256 per mini-batch)
-# ==> Top1: 93.000    Top5: 99.820    Loss: 0.314
+# 2018-07-02 16:36:31,555 - --- validate (epoch=179)-----------
+# 2018-07-02 16:36:31,555 - 5000 samples (256 per mini-batch)
+# 2018-07-02 16:36:33,121 - ==> Top1: 91.520    Top5: 99.680    Loss: 0.387
 #
-# Saving checkpoint
-# --- test ---------------------
-# 10000 samples (256 per mini-batch)
-# ==> Top1: 92.970    Top5: 99.740    Loss: 0.349
-#
-#
-# Log file for this run: /home/cvds_lab/nzmora/pytorch_workspace/private-distiller/examples/classifier_compression/logs/2018.04.09-222954/2018.04.09-222954.log
-#
-# real    91m56.310s
-# user    176m50.080s
-# sys     27m5.873s
+# 2018-07-02 16:36:33,123 - Saving checkpoint to: logs/2018.07.02-152746/checkpoint.pth.tar
+# 2018-07-02 16:36:33,159 - --- test ---------------------
+# 2018-07-02 16:36:33,159 - 10000 samples (256 per mini-batch)
+# 2018-07-02 16:36:36,194 - ==> Top1: 92.850    Top5: 99.780    Loss: 0.364
 
 lr_schedulers:
   training_lr:
diff --git a/examples/word_language_model/main.py b/examples/word_language_model/main.py
index 767b80c57d532fdaaf78c997dfbf463a1260be57..a8ff493d7a76733cb52e47df3ab42482f3904d9e 100755
--- a/examples/word_language_model/main.py
+++ b/examples/word_language_model/main.py
@@ -306,7 +306,7 @@ if args.summary:
             threshold = bottomk.data[-1]
             msglogger.info("parameter %s: q = %.2f" %(name, threshold))
     else:
-        distiller.model_summary(model, None, which_summary, 'wikitext2')
+        distiller.model_summary(model, which_summary, 'wikitext2')
     exit(0)
 
 compression_scheduler = None
@@ -317,8 +317,8 @@ if args.compress:
     compression_scheduler = distiller.config.file_config(model, None, args.compress)
 
 optimizer = torch.optim.SGD(model.parameters(), args.lr,
-                                 momentum=args.momentum,
-                                 weight_decay=args.weight_decay)
+                            momentum=args.momentum,
+                            weight_decay=args.weight_decay)
 lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                           patience=0, verbose=True, factor=0.5)
 
diff --git a/models/__init__.py b/models/__init__.py
index d2a5c4fbb277a21ed16f291ede774b48e4d442ca..b34d7a29399cc6f08a649ead808f8a4624e79d24 100755
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -72,6 +72,5 @@ def create_model(pretrained, dataset, arch, parallel=True, device_ids=None):
         model.features = torch.nn.DataParallel(model.features, device_ids=device_ids)
     elif parallel:
         model = torch.nn.DataParallel(model, device_ids=device_ids)
-
     model.cuda()
     return model
diff --git a/models/cifar10/__init__.py b/models/cifar10/__init__.py
index 3b72572744f7dc61529c05213624c9208f466678..e4f636fe642f4c9f7179454a1ce26d1f4ce454c6 100755
--- a/models/cifar10/__init__.py
+++ b/models/cifar10/__init__.py
@@ -19,3 +19,4 @@
 from .simplenet_cifar import *
 from .resnet_cifar import *
 from .preresnet_cifar import *
+from .vgg_cifar import *
diff --git a/models/cifar10/vgg_cifar.py b/models/cifar10/vgg_cifar.py
new file mode 100755
index 0000000000000000000000000000000000000000..0b5a5bb0d3047843879834778cc8cd8d6ae8f881
--- /dev/null
+++ b/models/cifar10/vgg_cifar.py
@@ -0,0 +1,133 @@
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""VGG for CIFAR10
+
+VGG for CIFAR10, based on "Very Deep Convolutional Networks for Large-Scale
+Image Recognition".
+This is based on TorchVision's implementation of VGG for ImageNet, with
+appropriate changes for the 10-class Cifar-10 dataset.
+We replaced the three linear classifiers with a single one.
+"""
+
+import torch.nn as nn
+
+__all__ = [
+    'VGGCifar', 'vgg11_cifar', 'vgg11_bn_cifar', 'vgg13_cifar', 'vgg13_bn_cifar', 'vgg16_cifar', 'vgg16_bn_cifar',
+    'vgg19_bn_cifar', 'vgg19_cifar',
+]
+
+
+class VGGCifar(nn.Module):
+    def __init__(self, features, num_classes=10, init_weights=True):
+        super(VGGCifar, self).__init__()
+        self.features = features
+        self.classifier = nn.Linear(512, num_classes)
+        if init_weights:
+            self._initialize_weights()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = x.view(x.size(0), -1)
+        x = self.classifier(x)
+        return x
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)
+
+
+def make_layers(cfg, batch_norm=False):
+    layers = []
+    in_channels = 3
+    for v in cfg:
+        if v == 'M':
+            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+        else:
+            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
+            if batch_norm:
+                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
+            else:
+                layers += [conv2d, nn.ReLU(inplace=True)]
+            in_channels = v
+    return nn.Sequential(*layers)
+
+
+cfg = {
+    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+}
+
+
+def vgg11_cifar(**kwargs):
+    """VGG 11-layer model (configuration "A")"""
+    model = VGGCifar(make_layers(cfg['A']), **kwargs)
+    return model
+
+
+def vgg11_bn_cifar(**kwargs):
+    """VGG 11-layer model (configuration "A") with batch normalization"""
+    model = VGGCifar(make_layers(cfg['A'], batch_norm=True), **kwargs)
+    return model
+
+
+def vgg13_cifar(**kwargs):
+    """VGG 13-layer model (configuration "B")"""
+    model = VGGCifar(make_layers(cfg['B']), **kwargs)
+    return model
+
+
+def vgg13_bn_cifar(**kwargs):
+    """VGG 13-layer model (configuration "B") with batch normalization"""
+    model = VGGCifar(make_layers(cfg['B'], batch_norm=True), **kwargs)
+    return model
+
+
+def vgg16_cifar(**kwargs):
+    """VGG 16-layer model (configuration "D")
+    """
+    model = VGGCifar(make_layers(cfg['D']), **kwargs)
+    return model
+
+
+def vgg16_bn_cifar(**kwargs):
+    """VGG 16-layer model (configuration "D") with batch normalization"""
+    model = VGGCifar(make_layers(cfg['D'], batch_norm=True), **kwargs)
+    return model
+
+
+def vgg19_cifar(**kwargs):
+    """VGG 19-layer model (configuration "E")
+    """
+    model = VGGCifar(make_layers(cfg['E']), **kwargs)
+    return model
+
+
+def vgg19_bn_cifar(**kwargs):
+    """VGG 19-layer model (configuration 'E') with batch normalization"""
+    model = VGGCifar(make_layers(cfg['E'], batch_norm=True), **kwargs)
+    return model
diff --git a/tests/common.py b/tests/common.py
index bcee3bf1f403b834784b192b646b51e705919c36..324f2b824681a2dc9f6e11dab894f62d2a81e494 100755
--- a/tests/common.py
+++ b/tests/common.py
@@ -23,7 +23,7 @@ import distiller
 from models import create_model
 
 
-def setup_test(arch, dataset, parallel=True):
+def setup_test(arch, dataset, parallel):
     model = create_model(False, dataset, arch, parallel=parallel)
     assert model is not None
 
diff --git a/tests/test_model_summary.py b/tests/test_model_summary.py
new file mode 100755
index 0000000000000000000000000000000000000000..f63e290e1bab9a63ec27b8e5174aff5f5705e370
--- /dev/null
+++ b/tests/test_model_summary.py
@@ -0,0 +1,63 @@
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+import torch
+import os
+import sys
+module_path = os.path.abspath(os.path.join('..'))
+if module_path not in sys.path:
+    sys.path.append(module_path)
+import distiller
+import pytest
+import common  # common test code
+import apputils
+
+# Logging configuration
+logging.basicConfig(level=logging.INFO)
+fh = logging.FileHandler('test.log')
+logger = logging.getLogger()
+logger.addHandler(fh)
+
+
+def test_png_generation():
+    DATASET = "cifar10"
+    ARCH = "resnet20_cifar"
+    model, zeros_mask_dict = common.setup_test(ARCH, DATASET, parallel=True)
+    # 2 different ways to create a PNG
+    apputils.draw_img_classifier_to_file(model, 'model.png', DATASET, True)
+    apputils.draw_img_classifier_to_file(model, 'model.png', DATASET, False)
+
+
+def test_negative():
+    DATASET = "cifar10"
+    ARCH = "resnet20_cifar"
+    model, zeros_mask_dict = common.setup_test(ARCH, DATASET, parallel=True)
+
+    with pytest.raises(ValueError):
+        # png is not a supported summary type, so we expect this to fail with a ValueError
+        distiller.model_summary(model, what='png', dataset=DATASET)
+
+
+def test_summary():
+    DATASET = "cifar10"
+    ARCH = "resnet20_cifar"
+    model, zeros_mask_dict = common.setup_test(ARCH, DATASET, parallel=True)
+
+    distiller.model_summary(model, what='sparsity', dataset=DATASET)
+    distiller.model_summary(model, what='compute', dataset=DATASET)
+    distiller.model_summary(model, what='model', dataset=DATASET)
+    distiller.model_summary(model, what='modules', dataset=DATASET)
diff --git a/tests/test_pruning.py b/tests/test_pruning.py
index 7b26709ec2b549392e5ee5f00d2299215420efcf..57d297510ef2774ba13647a421c2a840498870fd 100755
--- a/tests/test_pruning.py
+++ b/tests/test_pruning.py
@@ -108,7 +108,6 @@ def test_prune_all_filters(parallel):
 
 def ranked_filter_pruning(config, ratio_to_prune, is_parallel):
     """Test L1 ranking and pruning of filters.
-
     First we rank and prune the filters of a Convolutional layer using
     a L1RankedStructureParameterPruner.  Then we physically remove the
     filters from the model (via "thining" process).
@@ -218,7 +217,6 @@ def run_forward_backward(model, optimizer, dummy_input):
 
 def arbitrary_channel_pruning(config, channels_to_remove, is_parallel):
     """Test removal of arbitrary channels.
-
     The test receives a specification of channels to remove.
     Based on this specification, the channels are pruned and then physically
     removed from the model (via a "thinning" process).
diff --git a/tests/test_ranking.py b/tests/test_ranking.py
new file mode 100755
index 0000000000000000000000000000000000000000..a0fa14a222d2e7f53a0e418c569577b3801d22f6
--- /dev/null
+++ b/tests/test_ranking.py
@@ -0,0 +1,101 @@
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+import torch
+import os
+import sys
+try:
+    import distiller
+except ImportError:
+    module_path = os.path.abspath(os.path.join('..'))
+    if module_path not in sys.path:
+        sys.path.append(module_path)
+    import distiller
+import common  # common test code
+
+# Logging configuration
+logging.basicConfig(level=logging.INFO)
+fh = logging.FileHandler('test.log')
+logger = logging.getLogger()
+logger.addHandler(fh)
+
+
+def test_ch_ranking():
+    # Tensor with shape [3, 2, 2, 2] -- 3 filters, 2 channels
+    param = torch.tensor([[[[11., 12],
+                            [13,  14]],
+
+                           [[15., 16],
+                            [17,  18]]],
+                          # Filter #2
+                          [[[21., 22],
+                            [23,  24]],
+
+                           [[25., 26],
+                            [27,  28]]],
+                          # Filter #3
+                          [[[31., 32],
+                            [33,  34]],
+
+                           [[35., 36],
+                            [37,  38]]]])
+
+    fraction_to_prune = 0.5
+    bottomk_channels, channel_mags = distiller.pruning.L1RankedStructureParameterPruner.rank_channels(fraction_to_prune, param)
+    logger.info("bottom {}% channels: {}".format(fraction_to_prune*100, bottomk_channels))
+    assert bottomk_channels == torch.tensor([90.])
+
+
+def test_ranked_channel_pruning():
+    model, zeros_mask_dict = common.setup_test("resnet20_cifar", "cifar10", parallel=False)
+
+    # Test that we can access the weights tensor of the first convolution in layer 1
+    conv1_p = distiller.model_find_param(model, "layer1.0.conv1.weight")
+    assert conv1_p is not None
+
+    # Test that there are no zero-channels
+    assert distiller.sparsity_ch(conv1_p) == 0.0
+
+    # # Create a channel-ranking pruner
+    reg_regims = {"layer1.0.conv1.weight": [0.1, "Channels"]}
+    pruner = distiller.pruning.L1RankedStructureParameterPruner("channel_pruner", reg_regims)
+    pruner.set_param_mask(conv1_p, "layer1.0.conv1.weight", zeros_mask_dict, meta=None)
+
+    conv1 = common.find_module_by_name(model, "layer1.0.conv1")
+    assert conv1 is not None
+
+    # Test that the mask has the correct fraction of channels pruned.
+    # We asked for 10%, but there are only 16 channels, so we have to settle for 1/16 channels
+    logger.info("layer1.0.conv1 = {}".format(conv1))
+    expected_pruning = int(0.1 * conv1.in_channels) / conv1.in_channels
+    assert distiller.sparsity_ch(zeros_mask_dict["layer1.0.conv1.weight"].mask) == expected_pruning
+
+    # Use the mask to prune
+    assert distiller.sparsity_ch(conv1_p) == 0
+    zeros_mask_dict["layer1.0.conv1.weight"].apply_mask(conv1_p)
+    assert distiller.sparsity_ch(conv1_p) == expected_pruning
+
+    # Remove channels (and filters)
+    conv0 = common.find_module_by_name(model, "conv1")
+    assert conv0 is not None
+    assert conv0.out_channels == 16
+    assert conv1.in_channels == 16
+
+    # Test thinning
+    distiller.remove_channels(model, zeros_mask_dict, "resnet20_cifar", "cifar10", optimizer=None)
+    assert conv0.out_channels == 15
+    assert conv1.in_channels == 15
diff --git a/tests/test_summarygraph.py b/tests/test_summarygraph.py
index f36f56579ae70e88b9d171cc6bd8424ae104613c..b5bf6a82de51798db3fef9c377638c6a03a6a273 100755
--- a/tests/test_summarygraph.py
+++ b/tests/test_summarygraph.py
@@ -18,6 +18,7 @@ import logging
 import torch
 import os
 import sys
+import pytest
 module_path = os.path.abspath(os.path.join('..'))
 if module_path not in sys.path:
     sys.path.append(module_path)
@@ -154,6 +155,30 @@ def test_simplenet():
     assert len(preds) == 1
 
 
+def test_simplenet():
+    g = create_graph('cifar10', 'simplenet_cifar')
+    assert g is not None
+    preds = g.predecessors_f(normalize_module_name('module.conv1'), 'Conv')
+    logging.debug("[simplenet_cifar]: preds of module.conv1 = {}".format(preds))
+    assert len(preds) == 0
+
+    preds = g.predecessors_f(normalize_module_name('module.conv2'), 'Conv')
+    logging.debug("[simplenet_cifar]: preds of module.conv2 = {}".format(preds))
+    assert len(preds) == 1
+
+
+def test_simplenet():
+    g = create_graph('cifar10', 'simplenet_cifar')
+    assert g is not None
+    preds = g.predecessors_f(normalize_module_name('module.conv1'), 'Conv')
+    logging.debug("[simplenet_cifar]: preds of module.conv1 = {}".format(preds))
+    assert len(preds) == 0
+
+    preds = g.predecessors_f(normalize_module_name('module.conv2'), 'Conv')
+    logging.debug("[simplenet_cifar]: preds of module.conv2 = {}".format(preds))
+    assert len(preds) == 1
+
+
 def name_test(dataset, arch):
     model = create_model(False, dataset, arch, parallel=False)
     modelp = create_model(False, dataset, arch, parallel=True)