From be97de233961d7b2ccb7dc1af7c0534f5fda6027 Mon Sep 17 00:00:00 2001
From: Neta Zmora <neta.zmora@intel.com>
Date: Thu, 14 Jun 2018 11:23:53 +0300
Subject: [PATCH] Revert "ModelSummary: adapt sparsity accounting to correctly
 account for "weight tying"

This reverts commit ecade1b2573fdf0bed3ebe38ddd1cb03beb6cb48.
This simply does not work, so reverting until we find a correct solution.
For example, in the language model the encoder and decoder weights are tied and use the
same memory, and yet I can't see how to determine that they are the same parameter.
---
 distiller/model_summaries.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/distiller/model_summaries.py b/distiller/model_summaries.py
index bc9ee91..b9edddc 100755
--- a/distiller/model_summaries.py
+++ b/distiller/model_summaries.py
@@ -96,17 +96,11 @@ def weights_sparsity_summary(model, return_total_sparsity=False, param_dims=[2,4
     pd.set_option('precision', 2)
     params_size = 0
     sparse_params_size = 0
-    # In language models, we might use use "weight tying", which means that the same
-    # weights tensor is used in several different places.  If tying is used, we'd like
-    # to log the tensor information, but exclude it from the total sparsity calculation.
-    seen_params = []
     for name, param in model.state_dict().items():
         if (param.dim() in param_dims) and any(type in name for type in ['weight', 'bias']):
             _density = distiller.density(param)
-            if name not in seen_params:
-                params_size += torch.numel(param)
-                sparse_params_size += param.numel() * _density
-                seen_params.append(name)
+            params_size += torch.numel(param)
+            sparse_params_size += param.numel() * _density
             df.loc[len(df.index)] = ([
                 name,
                 distiller.size_to_str(param.size()),
-- 
GitLab