diff --git a/distiller/pruning/__init__.py b/distiller/pruning/__init__.py index 6fea7d9f606f54c8201f5d9f447df60256bb793c..960177c5c2438365ccae757ce30f6c077150d3cf 100755 --- a/distiller/pruning/__init__.py +++ b/distiller/pruning/__init__.py @@ -20,16 +20,22 @@ from .magnitude_pruner import MagnitudeParameterPruner from .automated_gradual_pruner import AutomatedGradualPruner, \ - L1RankedStructureParameterPruner_AGP, L2RankedStructureParameterPruner_AGP, \ - ActivationAPoZRankedFilterPruner_AGP, GradientRankedFilterPruner_AGP, \ + L1RankedStructureParameterPruner_AGP, \ + L2RankedStructureParameterPruner_AGP, \ + ActivationAPoZRankedFilterPruner_AGP, \ + ActivationMeanRankedFilterPruner_AGP, \ + GradientRankedFilterPruner_AGP, \ RandomRankedFilterPruner_AGP from .level_pruner import SparsityLevelParameterPruner from .sensitivity_pruner import SensitivityPruner from .splicing_pruner import SplicingPruner from .structure_pruner import StructureParameterPruner -from .ranked_structures_pruner import L1RankedStructureParameterPruner, L2RankedStructureParameterPruner, \ +from .ranked_structures_pruner import L1RankedStructureParameterPruner, \ + L2RankedStructureParameterPruner, \ ActivationAPoZRankedFilterPruner, \ - RandomRankedFilterPruner, GradientRankedFilterPruner + ActivationMeanRankedFilterPruner, \ + GradientRankedFilterPruner, \ + RandomRankedFilterPruner from .baidu_rnn_pruner import BaiduRNNPruner from .greedy_filter_pruning import greedy_pruner diff --git a/distiller/pruning/automated_gradual_pruner.py b/distiller/pruning/automated_gradual_pruner.py index 9afaaef93cd83f6703d9c8609a525524957ad6b3..b2c275d023ada2ded66db4f2836e004937e686ed 100755 --- a/distiller/pruning/automated_gradual_pruner.py +++ b/distiller/pruning/automated_gradual_pruner.py @@ -120,6 +120,13 @@ class ActivationAPoZRankedFilterPruner_AGP(StructuredAGP): weights=weights, group_dependency=group_dependency) +class ActivationMeanRankedFilterPruner_AGP(StructuredAGP): + def __init__(self, name, initial_sparsity, final_sparsity, group_type, weights, group_dependency=None): + assert group_type in ['3D', 'Filters'] + super().__init__(name, initial_sparsity, final_sparsity) + self.pruner = ActivationMeanRankedFilterPruner(name, group_type, desired_sparsity=0, + weights=weights, group_dependency=group_dependency) + class GradientRankedFilterPruner_AGP(StructuredAGP): def __init__(self, name, initial_sparsity, final_sparsity, group_type, weights, group_dependency=None): assert group_type in ['3D', 'Filters'] diff --git a/distiller/pruning/ranked_structures_pruner.py b/distiller/pruning/ranked_structures_pruner.py index 1a23c620dc9ea432fe0ef4658e877cb4bb670456..d6d3fe16c8d09564138d2622facb0c77969c840c 100755 --- a/distiller/pruning/ranked_structures_pruner.py +++ b/distiller/pruning/ranked_structures_pruner.py @@ -341,17 +341,17 @@ def mask_from_filter_order(filters_ordered_by_criterion, param, num_filters, bin return expanded.view(param.shape), binary_map -class ActivationAPoZRankedFilterPruner(RankedStructureParameterPruner): - """Uses mean APoZ (average percentage of zeros) activation channels to rank structures - and prune a specified percentage of structures. - - "Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures", - Hengyuan Hu, Rui Peng, Yu-Wing Tai, Chi-Keung Tang, ICLR 2016 - https://arxiv.org/abs/1607.03250 +class ActivationRankedFilterPruner(RankedStructureParameterPruner): + """Base class for pruners ranking convolution filters by some quality criterion of the + corresponding feature-map channels (e.g. mean channel activation L1 value). """ def __init__(self, name, group_type, desired_sparsity, weights, group_dependency=None): super().__init__(name, group_type, desired_sparsity, weights, group_dependency) + @property + def activation_rank_criterion(self): + raise NotImplementedError + def prune_group(self, fraction_to_prune, param, param_name, zeros_mask_dict, model=None, binary_map=None): if fraction_to_prune == 0: return @@ -368,11 +368,12 @@ class ActivationAPoZRankedFilterPruner(RankedStructureParameterPruner): if module is None: raise ValueError("Could not find a layer named %s in the model." "\nMake sure to use assign_layer_fq_names()" % fq_name) - if not hasattr(module, 'apoz_channels'): - raise ValueError("Could not find attribute \'apoz_channels\' in module %s" - "\nMake sure to use SummaryActivationStatsCollector(\"apoz_channels\")" % fq_name) + if not hasattr(module, self.activation_rank_criterion): + raise ValueError("Could not find attribute \"{}\" in module %s" + "\nMake sure to use SummaryActivationStatsCollector(\"{}\")". + format(self.activation_rank_criterion, fq_name, self.activation_rank_criterion)) - apoz, std = module.apoz_channels.value() + quality_criterion, std = getattr(module, self.activation_rank_criterion).value() num_filters = param.size(0) num_filters_to_prune = int(fraction_to_prune * num_filters) if num_filters_to_prune == 0: @@ -380,8 +381,8 @@ class ActivationAPoZRankedFilterPruner(RankedStructureParameterPruner): return # Sort from low to high, and remove the bottom 'num_filters_to_prune' filters - filters_ordered_by_apoz = np.argsort(apoz)[:-num_filters_to_prune] - mask, binary_map = mask_from_filter_order(filters_ordered_by_apoz, param, num_filters, binary_map) + filters_ordered_by_criterion = np.argsort(quality_criterion)[:-num_filters_to_prune] + mask, binary_map = mask_from_filter_order(filters_ordered_by_criterion, param, num_filters, binary_map) zeros_mask_dict[param_name].mask = mask msglogger.info("ActivationL1RankedStructureParameterPruner - param: %s pruned=%.3f goal=%.3f (%d/%d)", @@ -391,8 +392,33 @@ class ActivationAPoZRankedFilterPruner(RankedStructureParameterPruner): return binary_map +class ActivationAPoZRankedFilterPruner(ActivationRankedFilterPruner): + """Uses mean APoZ (average percentage of zeros) activation channels to rank filters + and prune a specified percentage of filters. + + "Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures," + Hengyuan Hu, Rui Peng, Yu-Wing Tai, Chi-Keung Tang. ICLR 2016. + https://arxiv.org/abs/1607.03250 + """ + @property + def activation_rank_criterion(self): + return 'apoz_channels' + + +class ActivationMeanRankedFilterPruner(ActivationRankedFilterPruner): + """Uses mean value of activation channels to rank filters and prune a specified percentage of filters. + + "Pruning Convolutional Neural Networks for Resource Efficient Inference," + Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila, Jan Kautz. ICLR 2017. + https://arxiv.org/abs/1611.06440 + """ + @property + def activation_rank_criterion(self): + return 'mean_channels' + + class RandomRankedFilterPruner(RankedStructureParameterPruner): - """A Random raanking of filters. + """A Random ranking of filters. This is used for sanity testing of other algorithms. """ diff --git a/examples/classifier_compression/compress_classifier.py b/examples/classifier_compression/compress_classifier.py index 29412f1ebc7f28b79925999e74b603f4ec9c0398..37a9df170dc81beb779f61154a415a49a49b25b5 100755 --- a/examples/classifier_compression/compress_classifier.py +++ b/examples/classifier_compression/compress_classifier.py @@ -704,6 +704,8 @@ def create_activation_stats_collectors(model, *phases): distiller.utils.activation_channels_l1), "apoz_channels": SummaryActivationStatsCollector(model, "apoz_channels", distiller.utils.activation_channels_apoz), + "mean_channels": SummaryActivationStatsCollector(model, "mean_channels", + distiller.utils.activation_channels_means), "records": RecordsActivationStatsCollector(model, classes=[torch.nn.Conv2d]) })