From be97de233961d7b2ccb7dc1af7c0534f5fda6027 Mon Sep 17 00:00:00 2001 From: Neta Zmora <neta.zmora@intel.com> Date: Thu, 14 Jun 2018 11:23:53 +0300 Subject: [PATCH] Revert "ModelSummary: adapt sparsity accounting to correctly account for "weight tying" This reverts commit ecade1b2573fdf0bed3ebe38ddd1cb03beb6cb48. This simply does not work, so reverting until we find a correct solution. For example, in the language model the encoder and decoder weights are tied and use the same memory, and yet I can't see how to determine that they are the same parameter. --- distiller/model_summaries.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/distiller/model_summaries.py b/distiller/model_summaries.py index bc9ee91..b9edddc 100755 --- a/distiller/model_summaries.py +++ b/distiller/model_summaries.py @@ -96,17 +96,11 @@ def weights_sparsity_summary(model, return_total_sparsity=False, param_dims=[2,4 pd.set_option('precision', 2) params_size = 0 sparse_params_size = 0 - # In language models, we might use use "weight tying", which means that the same - # weights tensor is used in several different places. If tying is used, we'd like - # to log the tensor information, but exclude it from the total sparsity calculation. - seen_params = [] for name, param in model.state_dict().items(): if (param.dim() in param_dims) and any(type in name for type in ['weight', 'bias']): _density = distiller.density(param) - if name not in seen_params: - params_size += torch.numel(param) - sparse_params_size += param.numel() * _density - seen_params.append(name) + params_size += torch.numel(param) + sparse_params_size += param.numel() * _density df.loc[len(df.index)] = ([ name, distiller.size_to_str(param.size()), -- GitLab