Post-train quant: Refactor inputs quantization (#454)

* Fake quant wrapper now also works on (fake) quantized inputs * Remove 'requires_quantized_inputs' flag * Unrelated: Moved LinearQuantMode enum to q_utils

Post-train quant: Refactor inputs quantization (#454)
e82d9380 · Guy Jacob · GitHub · 47175961 · e82d9380 · e82d9380
Unverified Commit e82d9380 authored 5 years ago by Guy Jacob Committed by GitHub 5 years ago
--- a/distiller/quantization/__init__.py
+++ b/distiller/quantization/__init__.py
@@ -16,9 +16,11 @@

 from .quantizer import Quantizer
 from .range_linear import RangeLinearQuantWrapper, RangeLinearQuantParamLayerWrapper, PostTrainLinearQuantizer, \
-    LinearQuantMode, QuantAwareTrainRangeLinearQuantizer, add_post_train_quant_args, NCFQuantAwareTrainQuantizer, \
-    RangeLinearQuantConcatWrapper, RangeLinearQuantEltwiseAddWrapper, RangeLinearQuantEltwiseMultWrapper, ClipMode
+    QuantAwareTrainRangeLinearQuantizer, add_post_train_quant_args, NCFQuantAwareTrainQuantizer, \
+    RangeLinearQuantConcatWrapper, RangeLinearQuantEltwiseAddWrapper, RangeLinearQuantEltwiseMultWrapper, ClipMode, \
+    RangeLinearEmbeddingWrapper, RangeLinearFakeQuantWrapper, RangeLinearQuantMatmulWrapper
 from .clipped_linear import LinearQuantizeSTE, ClippedLinearQuantization, WRPNQuantizer, DorefaQuantizer, PACTQuantizer
+from .q_utils import *

 del quantizer
 del range_linear

--- a/distiller/quantization/ptq_greedy_search.py
+++ b/distiller/quantization/ptq_greedy_search.py
@@ -18,7 +18,8 @@ Here we implement the greedy search algorithm for automatic quantization.
 """
 import torch
 import torch.nn as nn
-from distiller.quantization.range_linear import PostTrainLinearQuantizer, ClipMode, LinearQuantMode
+from distiller.quantization import LinearQuantMode
+from distiller.quantization.range_linear import PostTrainLinearQuantizer, ClipMode
 from distiller.summary_graph import SummaryGraph
 from distiller.model_transforms import fold_batch_norms
 import distiller.modules

--- a/distiller/quantization/q_utils.py
+++ b/distiller/quantization/q_utils.py
@@ -18,6 +18,12 @@ from enum import Enum
 import torch


+class LinearQuantMode(Enum):
+    SYMMETRIC = 1
+    ASYMMETRIC_UNSIGNED = 2
+    ASYMMETRIC_SIGNED = 3
+
+
 def _prep_saturation_val_tensor(sat_val):
    is_scalar = not isinstance(sat_val, torch.Tensor)
    out = torch.tensor(sat_val) if is_scalar else sat_val.clone().detach()

--- a/distiller/quantization/range_linear.py
+++ b/distiller/quantization/range_linear.py