From 0ae07549e0746fbda8816d59e0921a3e3b0ec089 Mon Sep 17 00:00:00 2001 From: Guy Jacob <guy.jacob@intel.com> Date: Tue, 12 Feb 2019 17:53:41 +0200 Subject: [PATCH] Post-train quant bugfix: squeeze scale factor shape when quantizing bias --- distiller/quantization/range_linear.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/distiller/quantization/range_linear.py b/distiller/quantization/range_linear.py index c4e797f..234bfe8 100644 --- a/distiller/quantization/range_linear.py +++ b/distiller/quantization/range_linear.py @@ -359,7 +359,7 @@ class RangeLinearQuantParamLayerWrapper(RangeLinearQuantWrapper): self.has_bias = hasattr(wrapped_module, 'bias') and wrapped_module.bias is not None if self.has_bias: if self.preset_act_stats: - linear_quantize_clamp(wrapped_module.bias.data, self.accum_scale, 0, + linear_quantize_clamp(wrapped_module.bias.data, self.accum_scale.squeeze(), 0, self.accum_min_q_val, self.accum_max_q_val, inplace=True) else: b_scale, b_zero_point = _get_quant_params_from_tensor(wrapped_module.bias, num_bits_params, self.mode) @@ -387,7 +387,7 @@ class RangeLinearQuantParamLayerWrapper(RangeLinearQuantWrapper): if self.has_bias: # Re-quantize bias to match x * w scale: b_q' = (in_scale * w_scale / b_scale) * (b_q + b_zero_point) self.wrapped_module.bias.data = linear_quantize_clamp(self.base_b_q + self.b_zero_point, - self.accum_scale / self.b_scale, 0, + self.accum_scale.squeeze() / self.b_scale, 0, self.accum_min_q_val, self.accum_max_q_val) # Note the main terms within the summation is: -- GitLab