diff --git a/distiller/apputils/checkpoint.py b/distiller/apputils/checkpoint.py
index 70e9de0deccc088539d4fd8f820fdb47220b2d7c..fc222e3ce74ecb794cb66830915517a5e02293cb 100755
--- a/distiller/apputils/checkpoint.py
+++ b/distiller/apputils/checkpoint.py
@@ -29,7 +29,6 @@ from tabulate import tabulate
 import torch
 import distiller
 from distiller.utils import normalize_module_name
-import distiller.quantization as quantization
 msglogger = logging.getLogger()
 
 
@@ -227,7 +226,7 @@ def load_checkpoint(model, chkpt_file, optimizer=None,
 
         if qmd.get('pytorch_convert', False):
             msglogger.info('Converting Distiller PTQ model to PyTorch quantization API')
-            model = quantization.convert_distiller_ptq_model_to_pytorch(model, dummy_input=qmd['dummy_input'])
+            model = quantizer.convert_to_pytorch(qmd['dummy_input'], backend=qmd.get('pytorch_convert_backend', None))
 
     if normalize_dataparallel_keys:
         checkpoint['state_dict'] = {normalize_module_name(k): v for k, v in checkpoint['state_dict'].items()}
diff --git a/distiller/apputils/image_classifier.py b/distiller/apputils/image_classifier.py
index 845a96fb8f245c36d5b470785e099e1accac0258..c2e956f3a2069acb3f4dd4339bcf884bdf02508f 100755
--- a/distiller/apputils/image_classifier.py
+++ b/distiller/apputils/image_classifier.py
@@ -204,13 +204,13 @@ class ClassifierCompressor(object):
                     self.pylogger, self.activations_collectors, args=self.args)
 
 
-def init_classifier_compression_arg_parser():
+def init_classifier_compression_arg_parser(include_ptq_lapq_args=False):
     '''Common classifier-compression application command-line arguments.
     '''
     SUMMARY_CHOICES = ['sparsity', 'compute', 'model', 'modules', 'png', 'png_w_params']
 
     parser = argparse.ArgumentParser(description='Distiller image classification model compression')
-    parser.add_argument('data', metavar='DIR', help='path to dataset')
+    parser.add_argument('data', metavar='DATASET_DIR', help='path to dataset')
     parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', type=lambda s: s.lower(),
                         choices=models.ALL_MODEL_NAMES,
                         help='model architecture: ' +
@@ -312,7 +312,7 @@ def init_classifier_compression_arg_parser():
                         help='Load a model without DataParallel wrapping it')
     parser.add_argument('--thinnify', dest='thinnify', action='store_true', default=False,
                         help='physically remove zero-filters and create a smaller model')
-    distiller.quantization.add_post_train_quant_args(parser)
+    distiller.quantization.add_post_train_quant_args(parser, add_lapq_args=include_ptq_lapq_args)
     return parser
 
 
diff --git a/distiller/data_loggers/collector.py b/distiller/data_loggers/collector.py
index b7430723de65634a6c1d17d451867a461c7fda5b..8cc41aab6921d8026720dd509759a96d12aff832 100755
--- a/distiller/data_loggers/collector.py
+++ b/distiller/data_loggers/collector.py
@@ -33,16 +33,18 @@ matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import distiller
 from distiller.quantization.range_linear import is_post_train_quant_wrapper
+from distiller.quantization.pytorch_quant_conversion import QFunctionalWrapper
 import numpy as np
+import concurrent.futures
 
 msglogger = logging.getLogger()
 
-__all__ = ['SummaryActivationStatsCollector', 'RecordsActivationStatsCollector',
-           'QuantCalibrationStatsCollector', 'ActivationHistogramsCollector',
-           'CollectorDirection',
-           'collect_quant_stats', 'collect_histograms',
+__all__ = ['SummaryActivationStatsCollector', 'RecordsActivationStatsCollector', 'QuantCalibrationStatsCollector',
+           'ActivationHistogramsCollector', 'RawActivationsCollector', 'CollectorDirection',
+           'collect_quant_stats', 'collect_histograms', 'collect_raw_outputs',
            'collector_context', 'collectors_context']
 
+
 class CollectorDirection(enum.Enum):
     OUT = 0
     OFM = 0
@@ -169,7 +171,8 @@ class ActivationStatsCollector(object):
         # We make an exception for models that were quantized with 'PostTrainLinearQuantizer'. In these
         # models, the quantized modules are actually wrappers of the original FP32 modules, so they are
         # NOT leaf modules - but we still want to track them.
-        if distiller.has_children(module) and not is_post_train_quant_wrapper(module):
+        if distiller.has_children(module) and not (is_post_train_quant_wrapper(module) or
+                                                   isinstance(module, QFunctionalWrapper)):
             return False
         if isinstance(module, torch.nn.Identity):
             return False
@@ -216,7 +219,7 @@ class SummaryActivationStatsCollector(ActivationStatsCollector):
     inputs_consolidate_func is called on tuple of tensors, and returns a tensor.
     """
     def __init__(self, model, stat_name, summary_fn,
-                 classes=[torch.nn.ReLU, torch.nn.ReLU6, torch.nn.LeakyReLU],
+                 classes=(torch.nn.ReLU, torch.nn.ReLU6, torch.nn.LeakyReLU),
                  collector_direction=CollectorDirection.OUT,
                  inputs_consolidate_func=torch.cat):
         super(SummaryActivationStatsCollector, self).__init__(model, stat_name, classes)
@@ -302,9 +305,9 @@ class RecordsActivationStatsCollector(ActivationStatsCollector):
 
     For obvious reasons, this is slower than SummaryActivationStatsCollector.
     """
-    def __init__(self, model, classes=[torch.nn.ReLU,
+    def __init__(self, model, classes=(torch.nn.ReLU,
                                        torch.nn.ReLU6,
-                                       torch.nn.LeakyReLU]):
+                                       torch.nn.LeakyReLU)):
         super(RecordsActivationStatsCollector, self).__init__(model, "statistics_records", classes)
 
     def _activation_stats_cb(self, module, inputs, output):
@@ -798,6 +801,47 @@ class ActivationHistogramsCollector(ActivationStatsCollector):
         return fname
 
 
+class RawActivationsCollector(ActivationStatsCollector):
+    def __init__(self, model, classes=None):
+        super(RawActivationsCollector, self).__init__(model, "raw_acts", classes)
+
+        _verify_no_dataparallel(model)
+
+    def _activation_stats_cb(self, module, inputs, output):
+        if isinstance(output, torch.Tensor):
+            if output.is_quantized:
+                module.raw_outputs.append(output.dequantize())
+            else:
+                module.raw_outputs.append(output.cpu())
+
+    def _start_counter(self, module):
+        module.raw_outputs = []
+
+    def _reset_counter(self, module):
+        if hasattr(module, 'raw_outputs'):
+            module.raw_outputs = []
+
+    def _collect_activations_stats(self, module, activation_stats, name=''):
+        if not hasattr(module, 'raw_outputs'):
+            return
+
+        if isinstance(module.raw_outputs, list) and len(module.raw_outputs) > 0:
+            module.raw_outputs = torch.stack(module.raw_outputs)
+        activation_stats[module.distiller_name] = module.raw_outputs
+
+    def save(self, dir_name):
+        if not os.path.isdir(dir_name):
+            os.mkdir(dir_name)
+
+        with concurrent.futures.ProcessPoolExecutor() as executor:
+            for idx, (layer_name, raw_outputs) in enumerate(self.value().items()):
+                idx_str = '{:03d}'.format(idx + 1)
+                executor.submit(torch.save, raw_outputs, os.path.join(dir_name,
+                                                                      '-'.join((idx_str, layer_name)) + '.pt'))
+
+        return dir_name
+
+
 def collect_quant_stats(model, test_fn, save_dir=None, classes=None, inplace_runtime_check=False,
                         disable_inplace_attrs=False, inplace_attr_names=('inplace',),
                         modules_to_collect=None):
@@ -893,6 +937,20 @@ def collect_histograms(model, test_fn, save_dir=None, activation_stats=None,
     return histogram_collector.value()
 
 
+def collect_raw_outputs(model, test_fn, save_dir=None, classes=None):
+    msglogger.info('Collecting raw layer outputs for model')
+    collector = RawActivationsCollector(model, classes=classes)
+    with collector_context(collector):
+        test_fn(model=model)
+    msglogger.info('Outputs collection complete')
+    if save_dir is not None:
+        msglogger.info('Saving outputs to disk...')
+        save_path = os.path.join(save_dir, 'raw_outputs')
+        collector.save(save_path)
+        msglogger.info('Outputs saved to ' + save_path)
+    return collector.value()
+
+
 @contextmanager
 def collector_context(collector, modules_list=None):
     """A context manager for an activation collector"""
diff --git a/distiller/quantization/ptq_coordinate_search.py b/distiller/quantization/ptq_coordinate_search.py
index cd2f4dfcaf1513a7fbfa250ce6b3aa473d142478..ea8ecf815bb56d32b3d53b6763a426546c75a676 100644
--- a/distiller/quantization/ptq_coordinate_search.py
+++ b/distiller/quantization/ptq_coordinate_search.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019 Intel Corporation
+# Copyright (c) 2020 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -36,11 +36,12 @@ from collections import OrderedDict
 from itertools import count
 import logging
 from copy import deepcopy
-import distiller.apputils.image_classifier as classifier
-import os
-import distiller.apputils as apputils
 import scipy.optimize as opt
 import numpy as np
+import argparse
+
+
+msglogger = logging.getLogger()
 
 
 def quant_params_dict2vec(p_dict, search_clipping=False):
@@ -172,8 +173,8 @@ def get_input_for_layer(model, layer_name, eval_fn):
     return layer_inputs[0]
 
 
-def init_layer_linear_quant_params(quantizer, original_model, layer_name, init_mode,
-                                   init_mode_method='Powell', eval_fn=None, search_clipping=False):
+def init_layer_linear_quant_params(quantizer, original_model, layer_name, init_mode=ClipMode.NONE,
+                                   init_method='Powell', eval_fn=None, search_clipping=False):
     """
     Initializes a layer's linear quant parameters.
     This is done to set the scipy.optimize.minimize initial guess.
@@ -190,7 +191,7 @@ def init_layer_linear_quant_params(quantizer, original_model, layer_name, init_m
           If str - the mode will be chosen from a list of options. The options are:
             [NONE, AVG, LAPLACE, GAUSS, L1, L2 ,L3].
           Defaults to ClipMode.NONE
-        init_mode_method (str or callable): applicable only in the case of init_mode = 'L1/2/3' or callable.
+        init_method (str or callable): applicable only in the case of init_mode = 'L1/2/3' or callable.
           chooses the minimization method for finding the local argmin_{s, zp}.
           Defaults to 'Powell'
         eval_fn: evaluation function for the model. Assumed it has a signature of the form
@@ -214,7 +215,7 @@ def init_layer_linear_quant_params(quantizer, original_model, layer_name, init_m
 
     if callable(init_mode):
         input_for_layer = get_input_for_layer(original_model, layer_name, eval_fn)
-        quantized_layer = optimize_for_layer(layer, quantized_layer, init_mode, input_for_layer, init_mode_method,
+        quantized_layer = optimize_for_layer(layer, quantized_layer, init_mode, input_for_layer, init_method,
                                              search_clipping=search_clipping)
 
     distiller.model_setattr(quantizer.model, denorm_layer_name, quantized_layer)
@@ -222,7 +223,7 @@ def init_layer_linear_quant_params(quantizer, original_model, layer_name, init_m
 
 
 def init_linear_quant_params(quantizer, original_model, eval_fn, dummy_input, init_mode,
-                             init_mode_method=None, search_clipping=False):
+                             init_method='Powell', search_clipping=False):
     """
     Initializes all linear quantization parameters of the model.
     Args:
@@ -235,7 +236,7 @@ def init_linear_quant_params(quantizer, original_model, eval_fn, dummy_input, in
           `eval_fn(model)->float`. this is the function to be minimized by the optimization algorithm.
           Note - unlike in `init_layer_linear_quant_params`, this argument is required here.
         dummy_input: dummy sample input to the model
-        init_mode_method: See `init_layer_linear_qaunt_params`.
+        init_method: See `init_layer_linear_qaunt_params`.
         search_clipping (bool): if set, optimize clipping values, otherwise optimize scale factor
     """
     original_model = distiller.make_non_parallel_copy(original_model)
@@ -249,7 +250,7 @@ def init_linear_quant_params(quantizer, original_model, eval_fn, dummy_input, in
         module_init_mode = init_mode[module_name] if isinstance(init_mode, dict) else init_mode
         msglogger.debug('Initializing layer \'%s\' using %s mode' % (module_name, module_init_mode))
         init_layer_linear_quant_params(quantizer, original_model, module_name, module_init_mode,
-                                       init_mode_method=init_mode_method,
+                                       init_method=init_method,
                                        eval_fn=eval_fn,
                                        search_clipping=search_clipping)
     del original_model
@@ -258,37 +259,55 @@ def init_linear_quant_params(quantizer, original_model, eval_fn, dummy_input, in
     quantizer.model.eval()
 
 
-def get_default_args():
-    parser = classifier.init_classifier_compression_arg_parser()
-    parser.add_argument('--opt-maxiter', dest='maxiter', default=None, type=int,
-                        help='Max iteration for minimization method.')
-    parser.add_argument('--opt-maxfev', dest='maxfev', default=None, type=int,
-                        help='Max iteration for minimization method.')
-    parser.add_argument('--opt-method', dest='method', default='Powell',
-                        help='Minimization method used by scip.optimize.minimize.')
-    parser.add_argument('--opt-bh', dest='basinhopping', action='store_true', default=False,
-                        help='Use scipy.optimize.basinhopping stochastic global minimum search.')
-    parser.add_argument('--opt-bh-niter', dest='niter', default=100,
-                        help='Number of iterations for the basinhopping algorithm.')
-    parser.add_argument('--opt-init-mode', dest='init_mode', default='NONE',
-                        choices=list(_INIT_MODES),
-                        help='The mode of quant initalization. Choices: ' + '|'.join(list(_INIT_MODES)))
-    parser.add_argument('--opt-init-method', dest='init_mode_method',
-                        help='If --opt-init-mode was specified as L1/L2/L3, this specifies the method of '
-                             'minimization.')
-    parser.add_argument('--opt-val-size', type=float, default=1,
-                        help='Use portion of the test size.')
-    parser.add_argument('--opt-eval-memoize-dataloader', dest='memoize_dataloader', action='store_true', default=False,
-                        help='Stores the input batch in memory to optimize performance.')
-    parser.add_argument('--base-score', type=float, default=None)
-    parser.add_argument('--opt-search-clipping', dest='search_clipping', action='store_true',
-                        help='Search on clipping values instead of scale/zero_point.')
-    args = parser.parse_args()
-    return args
-
-
-def validate_quantization_settings(args, quantized_model):
-    if args.search_clipping:
+def add_coordinate_search_args(parser: argparse.ArgumentParser):
+    group = parser.add_argument_group('Post-Training Quantization Auto-Optimization (LAPQ) Arguments')
+    group.add_argument('--lapq-maxiter', default=None, type=int,
+                       help='Max iteration for minimization method.')
+    group.add_argument('--lapq-maxfev', default=None, type=int,
+                       help='Max iteration for minimization method.')
+    group.add_argument('--lapq-method', default='Powell',
+                       help='Minimization method used by scip.optimize.minimize.')
+    group.add_argument('--lapq-basinhopping', '--lapq-bh', action='store_true', default=False,
+                       help='Use scipy.optimize.basinhopping stochastic global minimum search.')
+    group.add_argument('--lapq-basinhopping-niter', '--lapq-bh-niter', default=100,
+                       help='Number of iterations for the basinhopping algorithm.')
+    group.add_argument('--lapq-init-mode', default='NONE', choices=list(_INIT_MODES),
+                       help='The mode of quant initalization. Choices: ' + '|'.join(list(_INIT_MODES)))
+    group.add_argument('--lapq-init-method', default='Powell',
+                       help='If --lapq-init-mode was specified as L1/L2/L3, this specifies the method of '
+                            'minimization.')
+    group.add_argument('--lapq-eval-size', type=float, default=1,
+                       help='Portion of test dataset to use for evaluation function.')
+    group.add_argument('--lapq-eval-memoize-dataloader', action='store_true', default=False,
+                       help='Stores the input batch in memory to optimize performance.')
+    group.add_argument('--lapq-search-clipping', action='store_true',
+                       help='Search on clipping values instead of scale/zero_point.')
+
+
+def cmdline_args_to_dict(args):
+    """
+    Convenience function converting command line arguments obtained from add_coordinate_search_args
+    to a dictionary that can be passed as-is to ptq_coordinate_search.
+
+    Example:
+        # Assume pre-existing parser
+        add_coordinate_search_args(parser)
+        args = parser.parse_args()
+
+        # Assume quantizer, dummy_input, eval_fn, and test_fn have been set up
+        lapq_args_dict = cmdline_args_to_dict(args)
+        ptq_coordinate_search(quantizer, dummy_input, eval_fn, test_fn=test_fn, **lapq_args_dict)
+    """
+    prefix = 'lapq_'
+    len_prefix = len(prefix)
+    lapq_args = {k[len_prefix:]: v for k, v in vars(args).items() if k.startswith(prefix)}
+    lapq_args.pop('eval_size')
+    lapq_args.pop('eval_memoize_dataloader')
+    return lapq_args
+
+
+def validate_quantization_settings(quantized_model, search_clipping):
+    if search_clipping:
         return
     for n, m in quantized_model.named_modules():
         if not is_post_train_quant_wrapper(m, False):
@@ -306,55 +325,54 @@ def validate_quantization_settings(args, quantized_model):
                 raise ValueError(err_msg.format('weights'))
 
 
-def ptq_coordinate_search(model, dummy_input, eval_fn, method='Powell', options=None,
-                          act_stats=None, args=None, fold_sequences=True, basinhopping=False,
-                          init_args=None, minimizer_kwargs=None,
-                          test_fn=None):
+def ptq_coordinate_search(quantizer, dummy_input, eval_fn, test_fn=None, method='Powell',
+                          maxiter=None, maxfev=None, basinhopping=False, basinhopping_niter=100,
+                          init_mode=ClipMode.NONE, init_method=None, search_clipping=False,
+                          minimizer_kwargs=None):
     """
     Searches for the optimal post-train quantization configuration (scale/zero_points)
     for a model using numerical methods, as described by scipy.optimize.minimize.
     Args:
-        model (nn.Module): model to quantize
+        quantizer (distiller.quantization.PostTrainLinearQuantizer): A configured PostTrainLinearQuantizer object
+          containing the model being quantized
         dummy_input: an sample expected input to the model
         eval_fn (callable): evaluation function for the model. Assumed it has a signature of the form
           `eval_fn(model)->float`. this is the function to be minimized by the optimization algorithm.
-        method (str or callable): minimization method as accepted by scipy.optimize.minimize.
-        options (dict or None): options for the scipy optimizer
-        act_stats (OrderedDict): dictionary of statistics per layer, including inputs and outputs.
-          for more context refer to collect_quant_stats.
-        args: arguments from command-line.
-        fold_sequences (bool): flag, indicates to fold sequences before performing the search.
+        test_fn (callable): a function to test the current performance of the model. Assumed it has a signature of
+          the form `test_fn(model)->dict`, where the returned dict contains relevant results to be logged.
+          For example: {'top-1': VAL, 'top-5': VAL, 'loss': VAL}
+        method (str or callable): Minimization method as accepted by scipy.optimize.minimize.
+        maxiter (int): Maximum number of iterations to perform during minimization
+        maxfev (int): Maximum number of total function evaluations to perform during minimization
         basinhopping (bool): flag, indicates to use basinhopping as a global-minimization method,
           will pass the `method` argument to `scipy.optimize.basinhopping`.
-        init_args (tuple): arguments for initializing the linear quantization parameters.
-          Refer to `init_linear_quant_params` for more details.
-        minimizer_kwargs (dict): the kwargs for scipy.optimize.minimize procedure.
-        test_fn (callable): a function to test the current performance of the model.
+        basinhopping_niter (int): Number of iterations to perform if basinhopping is set
+        init_mode (ClipMode or callable or str or dict): See 'init_linear_quant_params'
+        init_method (str or callable): See 'init_layer_linear_quant_params'
+        search_clipping (bool): Search on clipping values instead of directly on scale/zero-point (scale and zero-
+          point are inferred from the clipping values)
+        minimizer_kwargs (dict): Optional additional arguments for scipy.optimize.minimize
     """
-    if fold_sequences:
-        model = fold_batch_norms(model, dummy_input)
-    if args is None:
-        args = get_default_args()
-    elif isinstance(args, dict):
-        updated_args = get_default_args()
-        updated_args.__dict__.update(args)
-        args = updated_args
-    original_model = deepcopy(model)
-
-    if not act_stats and not args.qe_config_file:
+    if not isinstance(quantizer, PostTrainLinearQuantizer):
+        raise ValueError('Only PostTrainLinearQuantizer supported, but got a {}'.format(quantizer.__class__.__name__))
+    if quantizer.prepared:
+        raise ValueError('Expecting a quantizer for which prepare_model has not been called')
+
+    original_model = deepcopy(quantizer.model)
+    original_model = fold_batch_norms(original_model, dummy_input)
+
+    if not quantizer.model_activation_stats:
         msglogger.info('Collecting stats for model...')
-        model_temp = distiller.utils.make_non_parallel_copy(model)
-        act_stats = collect_quant_stats(model_temp, eval_fn)
+        model_temp = distiller.utils.make_non_parallel_copy(original_model)
+        act_stats = collect_quant_stats(model_temp, eval_fn,
+                                        inplace_runtime_check=True, disable_inplace_attrs=True,
+                                        save_dir=getattr(msglogger, 'logdir', '.'))
         del model_temp
-        if args:
-            act_stats_path = '%s_act_stats.yaml' % args.arch
-            msglogger.info('Done. Saving act stats into %s' % act_stats_path)
-            distiller.yaml_ordered_save(act_stats_path, act_stats)
-            args.qe_stats_file = act_stats_path
+        quantizer.model_activation_stats = act_stats
+        quantizer.model.quantizer_metadata['params']['model_activation_stats'] = act_stats
 
     # Preparing model and init conditions:
     msglogger.info("Initializing quantizer...")
-    quantizer = PostTrainLinearQuantizer.from_args(model, args)
 
     # Make sure weights are re-quantizable and clip-able
     quantizer.save_fp_weights = True
@@ -368,26 +386,26 @@ def ptq_coordinate_search(model, dummy_input, eval_fn, method='Powell', options=
     quantizer.prepare_model(dummy_input)
     quantizer.model.eval()
 
-    validate_quantization_settings(args, quantizer.model)
+    validate_quantization_settings(quantizer.model, search_clipping)
 
     msglogger.info("Initializing quantization parameters...")
-    init_args = init_args or (args.init_mode, args.init_mode_method)
-    init_linear_quant_params(quantizer, original_model, eval_fn, dummy_input, *init_args,
-                             search_clipping=args.search_clipping)
+    init_linear_quant_params(quantizer, original_model, eval_fn, dummy_input, init_mode, init_method,
+                             search_clipping=search_clipping)
 
     msglogger.info("Evaluating initial quantization score...")
     best_data = {
-        'score': eval_fn(model),
+        'score': eval_fn(quantizer.model),
         'qp_dict': deepcopy(quantizer.linear_quant_params)
     }
     msglogger.info("Evaluation set loss after initialization %.3f" % best_data['score'])
     if test_fn:
         msglogger.info('Evaluating on full test set...')
-        l_top1, l_top5, l_loss = test_fn(quantizer.model)
-        msglogger.info('Test: \tloss=%.3f, top1=%.3f, top5=%.3f ' % (l_loss, l_top1, l_top5))
+        results = test_fn(quantizer.model)
+        s = ', '.join(['{} = {:.3f}'.format(k, v) for k, v in results.items()])
+        msglogger.info('Test: ' + s)
 
-    init_qp_dict = OrderedDict(quantizer.named_linear_quant_params(args.search_clipping, filter=True))
-    keys, init_qp_vec = quant_params_dict2vec(init_qp_dict, args.search_clipping)
+    init_qp_dict = OrderedDict(quantizer.named_linear_quant_params(search_clipping, filter=True))
+    keys, init_qp_vec = quant_params_dict2vec(init_qp_dict, search_clipping)
 
     iter_counter = count(1)
     eval_counter = count(1)
@@ -395,7 +413,7 @@ def ptq_coordinate_search(model, dummy_input, eval_fn, method='Powell', options=
     def feed_forward_fn(qp_vec):
         # if not _check_qp_vec(keys, qp_vec, quant_mode, args.search_clipping):
         #     return 1e6
-        qp_dict = quant_params_vec2dict(keys, qp_vec, args.search_clipping)
+        qp_dict = quant_params_vec2dict(keys, qp_vec, search_clipping)
         quantizer.update_linear_quant_params(qp_dict)
         loss = eval_fn(quantizer.model)
 
@@ -411,105 +429,31 @@ def ptq_coordinate_search(model, dummy_input, eval_fn, method='Powell', options=
         msglogger.info("Iteration %d: \t Score=%.3f" % (i, score))
         if score < best_data['score']:
             best_data['score'] = score
-            best_data['qp_dict'] = quant_params_vec2dict(keys, qp_vec, args.search_clipping)
+            best_data['qp_dict'] = quant_params_vec2dict(keys, qp_vec, search_clipping)
             msglogger.info("Saving current best quantization parameters.")
         if test_fn:
             msglogger.info('Evaluating on full test set...')
-            l_top1, l_top5, l_loss = test_fn(quantizer.model)
-            msglogger.info('Test: \tloss=%.3f, top1=%.3f, top5=%.3f ' % (l_loss, l_top1, l_top5))
-
-    options = options or OrderedDict()
-    if args.maxiter is not None:
-        options['maxiter'] = args.maxiter
-    if args.maxfev is not None:
-        options['maxfev'] = args.maxfev
+            results = test_fn(quantizer.model)
+            s = ', '.join(['{} = {:.3f}'.format(k, v) for k, v in results.items()])
+            msglogger.info('Test: ' + s)
+
+    options = OrderedDict()
+    options['maxiter'] = maxiter
+    options['maxfev'] = maxfev
+
     minimizer_kwargs = minimizer_kwargs or OrderedDict()
     minimizer_kwargs.update({
         'method': method, 'options': options
     })
-    basinhopping = basinhopping or args.basinhopping
     if basinhopping:
-        msglogger.info('Using basinhopping global minimum search with "%s" local minimization method'%
-                       method)
-        res = opt.basinhopping(feed_forward_fn, init_qp_vec, args.niter, callback=callback,
+        msglogger.info('Using basinhopping global minimum search with "%s" local minimization method' % method)
+        res = opt.basinhopping(feed_forward_fn, init_qp_vec, basinhopping_niter, callback=callback,
                                minimizer_kwargs=minimizer_kwargs)
     else:
         msglogger.info('Using "%s" minimization algorithm.' % method)
         res = opt.minimize(feed_forward_fn, init_qp_vec, callback=callback, **minimizer_kwargs)
 
-    msglogger.info("Optimization done. Best configuration: %s" % best_data['qp_dict'])
-    return model, best_data['qp_dict']
-
-
-if __name__ == "__main__":
-    args = get_default_args()
-    args.epochs = float('inf')  # hack for args parsing so there's no error in epochs
-    cc = classifier.ClassifierCompressor(args, script_dir=os.path.dirname(__file__))
-
-    args = deepcopy(cc.args)
-
-    effective_test_size_bak = args.effective_test_size
-    args.effective_test_size = args.opt_val_size
-    eval_data_loader = classifier.load_data(args, load_train=False, load_val=False, load_test=True, fixed_subset=True)
-
-    args.effective_test_size = effective_test_size_bak
-    test_data_loader = classifier.load_data(args, load_train=False, load_val=False, load_test=True)
-
-    # logging
-    logging.getLogger().setLevel(logging.WARNING)
-    msglogger = logging.getLogger(__name__)
-    msglogger.setLevel(logging.INFO)
-
-    model = cc.model.eval()
-    device = next(model.parameters()).device
-
-    if args.memoize_dataloader:
-        memoized_data_loader = []
-        for images, targets in eval_data_loader:
-            batch = images.to(device), targets.to(device)
-            memoized_data_loader.append(batch)
-    else:
-        memoized_data_loader = None
-
-    def eval_fn(model):
-        if args.memoize_dataloader:
-            loss = 0
-            for images, targets in memoized_data_loader:
-                outputs = model(images)
-                loss += cc.criterion(outputs, targets).item()
-            loss = loss / len(memoized_data_loader)
-        else:
-            _, _, loss = classifier.test(eval_data_loader, model, cc.criterion, [cc.tflogger, cc.pylogger],
-                                           None, args)
-        return loss
-
-    def test_fn(model):
-        return classifier.test(test_data_loader, model, cc.criterion, [cc.tflogger, cc.pylogger], None, args)
-
-    args.device = device
-    if args.resumed_checkpoint_path:
-        args.load_model_path = args.resumed_checkpoint_path
-    if args.load_model_path:
-        msglogger.info("Loading checkpoint from %s" % args.load_model_path)
-        model = apputils.load_lean_checkpoint(model, args.load_model_path,
-                                              model_device=args.device)
-
-    if args.qe_stats_file:
-        msglogger.info("Loading stats from %s" % args.qe_stats_file)
-        with open(args.qe_stats_file, 'r') as f:
-            act_stats = distiller.yaml_ordered_load(f)
-    else:
-        act_stats = None
-
-    dummy_input = torch.rand(*model.input_shape, device=args.device)
-    model, qp_dict = ptq_coordinate_search(model, dummy_input, eval_fn, args.method,
-                                           args=args, act_stats=act_stats, test_fn=test_fn)
-
-    top1, top5, loss = test_fn(model)
-
-    msglogger.info("Arch: %s \tTest: \t top1 = %.3f \t top5 = %.3f \t loss = %.3f" %
-                   (args.arch, top1, top5, loss))
-    distiller.yaml_ordered_save('%s.quant_params_dict.yaml' % args.arch, qp_dict)
-
-    distiller.apputils.save_checkpoint(0, args.arch, model, extras={'top1': top1, 'qp_dict': qp_dict}, name=args.name,
-                                       dir=cc.logdir)
+    msglogger.info('Optimization done')
+    msglogger.info('Best score: {}'.format(best_data['score']))
+    msglogger.info('Best Configuration: {}'.format(best_data['qp_dict']))
+    return quantizer.model, best_data['qp_dict']
diff --git a/distiller/quantization/pytorch_quant_conversion.py b/distiller/quantization/pytorch_quant_conversion.py
index 98501b4f956f853db6d6237ca01897ec12b9eaac..0b8e5e54ad46cfdf0a7d4c7fcee64c546a182708 100644
--- a/distiller/quantization/pytorch_quant_conversion.py
+++ b/distiller/quantization/pytorch_quant_conversion.py
@@ -140,7 +140,7 @@ def distiller_quantized_tensor_to_pytorch(tensor: torch.Tensor, scale, zp, num_b
     else:  # dest_dtype == torch.qint32:
         temp_dtype = torch.int32
     tensor = (tensor - zp_diff).to(temp_dtype)
-    if per_channel:
+    if per_channel and scale.shape[channel_dim] > 1:
         return torch._make_per_channel_quantized_tensor(tensor, converted_scale, converted_zp, channel_dim)
     return torch._make_per_tensor_quantized_tensor(tensor, converted_scale, converted_zp)
 
@@ -168,6 +168,8 @@ def _ptq_convert_pass_replace_range_linear_wrappers(module):
                                                              need_reduce_range(qset.quant_mode, torch.quint8))
                     d[idx] = (scale, zp, torch.quint8)
                 new_m = ConditionalQuantizeWrapper(new_m, d)
+        elif isinstance(m, distiller.quantization.RangeLinearEmbeddingWrapper):
+            new_m = m.to_pytorch_quant(need_reduce_range(m.wts_quant_settings.quant_mode, torch.quint8))
         elif distiller.has_children(m):
             new_m = _ptq_convert_pass_replace_range_linear_wrappers(m)
         elif not isinstance(m, nn.Identity):
@@ -246,7 +248,10 @@ def _ptq_convert_pass_remove_redundant_quant_dequant(model, dummy_input):
             handles.append(m.register_forward_pre_hook(quantize_wrapper_check_hook))
         elif isinstance(m, ConditionalDeQuantize):
             handles.append(m.register_forward_pre_hook(dequant_wrapper_check_hook))
-    out = model(dummy_input)
+    if isinstance(dummy_input, torch.Tensor):
+        out = model(dummy_input)
+    else:
+        out = model(*dummy_input)
     for h in handles:
         h.remove()
 
@@ -293,8 +298,8 @@ def convert_distiller_ptq_model_to_pytorch(model, dummy_input, backend='fbgemm')
         raise ValueError('Conversion to PyTorch native quantization supported only for models quantized '
                          'using distiller.quantization.PostTrainLinearQuantizer')
 
-    if dummy_input is None or not isinstance(dummy_input, torch.Tensor):
-        raise ValueError('Valid dummy input tensor required for converting PTQ model to PyTorch')
+    if dummy_input is None:
+        raise ValueError('Valid dummy input required for converting PTQ model to PyTorch')
 
     backends = ('fbgemm', 'qnnpack')
     if backend not in backends:
@@ -317,6 +322,7 @@ def convert_distiller_ptq_model_to_pytorch(model, dummy_input, backend='fbgemm')
 
     # This is used when loading the model from a checkpoint, to indicate that conversion needs to be applied
     quantizer_metadata['pytorch_convert'] = True
+    quantizer_metadata['pytorch_convert_backend'] = backend
     model.quantizer_metadata = quantizer_metadata
 
     return model
diff --git a/distiller/quantization/quantizer.py b/distiller/quantization/quantizer.py
index f4bc4488c571a1c322696e7294c8ad3d4e1f0c56..c3bd293e17bfc6757638a50e1f54314b9aedc51d 100644
--- a/distiller/quantization/quantizer.py
+++ b/distiller/quantization/quantizer.py
@@ -223,6 +223,9 @@ class Quantizer(object):
             with a reference to 'new_relu1'. Any override configuration made specifically for 'self.relu2'
             will be ignored. A warning message will be shown.
         """
+        if self.prepared:
+            raise RuntimeError('prepare_model can be called only once')
+
         msglogger.info('Preparing model for quantization using {0}'.format(self.__class__.__name__))
 
         self.model.quantizer_metadata["dummy_input"] = dummy_input
diff --git a/distiller/quantization/range_linear.py b/distiller/quantization/range_linear.py
index ef92314966c83b22f610788d35c90f6bb6b7d2fd..3bb9e407c456ad0c496e075bfa748f0fa6bc2844 100644
--- a/distiller/quantization/range_linear.py
+++ b/distiller/quantization/range_linear.py
@@ -268,7 +268,7 @@ def linear_dequantize_with_metadata(t, inplace=False):
     return t
 
 
-def add_post_train_quant_args(argparser):
+def add_post_train_quant_args(argparser, add_lapq_args=False):
     str_to_quant_mode_map = OrderedDict([
         ('sym', LinearQuantMode.SYMMETRIC),
         ('sym_restr', LinearQuantMode.SYMMETRIC_RESTRICTED),
@@ -293,8 +293,7 @@ def add_post_train_quant_args(argparser):
     linear_quant_mode_str_optional = partial(from_dict, d=str_to_quant_mode_map, optional=True)
     clip_mode_str = partial(from_dict, d=str_to_clip_mode_map, optional=False)
 
-    group = argparser.add_argument_group('Arguments controlling quantization at evaluation time '
-                                         '("post-training quantization")')
+    group = argparser.add_argument_group('Post-Training Quantization Arguments')
     group.add_argument('--quantize-eval', '--qe', action='store_true',
                        help='Apply linear quantization to model before evaluation. Applicable only if '
                             '--evaluate is also set')
@@ -338,15 +337,21 @@ def add_post_train_quant_args(argparser):
 
     stats_group = group.add_mutually_exclusive_group()
     stats_group.add_argument('--qe-stats-file', type=str, metavar='PATH',
-                       help='Path to YAML file with pre-made calibration stats')
+                             help='Path to YAML file with pre-made calibration stats')
     stats_group.add_argument('--qe-dynamic', action='store_true', help='Apply dynamic quantization')
     stats_group.add_argument('--qe-calibration', type=distiller.utils.float_range_argparse_checker(exc_min=True),
-                       metavar='PORTION_OF_TEST_SET', default=None,
-                       help='Run the model in evaluation mode on the specified portion of the test dataset and '
-                            'collect statistics')
+                             metavar='PORTION_OF_TEST_SET', default=None,
+                             help='Run the model in evaluation mode on the specified portion of the test dataset and '
+                                  'collect statistics')
     stats_group.add_argument('--qe-config-file', type=str, metavar='PATH',
-                       help='Path to YAML file containing configuration for PostTrainLinearQuantizer (if present, '
-                            'all other --qe* arguments are ignored)')
+                             help='Path to YAML file containing configuration for PostTrainRLinearQuantizer '
+                                  '(if present, all other --qe* arguments are ignored)')
+
+    if add_lapq_args:
+        from .ptq_coordinate_search import add_coordinate_search_args
+        group.add_argument('--qe-lapq', '--qe-coordinate-search', action='store_true',
+                           help='Optimize post-training quantization parameters using LAPQ method')
+        add_coordinate_search_args(argparser)
 
 
 class UnsatisfiedRequirements(Exception):
@@ -1161,7 +1166,7 @@ class RangeLinearQuantConcatWrapper(RangeLinearQuantWrapper):
                                                        reduce_range)
         m = pytqc.QFunctionalCat(self.wrapped_module.dim)
         m.qfunc.scale = float(scale)
-        m.qfunc.zp = int(zp)
+        m.qfunc.zero_point = int(zp)
         if self.clip_half_range:
             # The scale factor calculated in Distiller already considers the ReLU, so it's OK to apply the
             # ReLU after quantization
@@ -1212,7 +1217,7 @@ class RangeLinearQuantEltwiseAddWrapper(RangeLinearQuantWrapper):
                                                        reduce_range)
         m = pytqc.QFunctionalAddRelu() if self.clip_half_range else pytqc.QFunctionalAdd()
         m.qfunc.scale = float(scale)
-        m.qfunc.zp = int(zp)
+        m.qfunc.zero_point = int(zp)
         return m
 
 
@@ -1264,7 +1269,7 @@ class RangeLinearQuantEltwiseMultWrapper(RangeLinearQuantWrapper):
                                                        reduce_range)
         m = pytqc.QFunctionalMul()
         m.qfunc.scale = float(scale)
-        m.qfunc.zp = int(zp)
+        m.qfunc.zero_point = int(zp)
         if self.clip_half_range:
             # The scale factor calculated in Distiller already considers the ReLU, so it's OK to apply the
             # ReLU after quantization
@@ -1416,6 +1421,24 @@ class RangeLinearEmbeddingWrapper(nn.Module):
         out_f.quant_metadata = self.quant_metadata
         return out_f
 
+    def to_pytorch_quant(self, reduce_range):
+        # No quantized embedding in PyTorch, so use FP32 embedding followed by quantize
+        emb = deepcopy(self.wrapped_module)
+        with torch.no_grad():
+            if self.save_fp_weights:
+                w_dq = nn.Parameter(self.float_weight, requires_grad=False)
+            else:
+                w_dq = nn.Parameter(linear_dequantize(emb.weight, self.w_scale, self.w_zero_point),
+                                    requires_grad=False)
+        emb.weight = w_dq
+
+        scale, zp = pytqc.distiller_qparams_to_pytorch(self.w_scale, self.w_zero_point,
+                                                       self.wts_quant_settings.num_bits,
+                                                       self.wts_quant_settings.quant_mode, torch.quint8,
+                                                       reduce_range)
+
+        return nn.Sequential(emb, nnq.Quantize(scale, zp, torch.quint8))
+
 
 class RangeLinearFakeQuantWrapper(RangeLinearQuantWrapper):
     def __init__(self, wrapped_module, num_bits_acts, mode=LinearQuantMode.SYMMETRIC, clip_acts=ClipMode.NONE,
@@ -1597,15 +1620,6 @@ class PostTrainLinearQuantizer(Quantizer):
                     model_activation_stats = distiller.utils.yaml_ordered_load(stream)
             elif not isinstance(model_activation_stats, (dict, OrderedDict)):
                 raise TypeError('model_activation_stats must either be a string, a dict / OrderedDict or None')
-        else:
-            msglogger.warning("\nWARNING:\nNo stats file passed - Dynamic quantization will be used\n"
-                              "At the moment, this mode isn't as fully featured as stats-based quantization, and "
-                              "the accuracy results obtained are likely not as representative of real-world results."
-                              "\nSpecifically:\n"
-                              "  * Not all modules types are supported in this mode. Unsupported modules will remain "
-                              "in FP32.\n"
-                              "  * Optimizations for quantization of layers followed by Relu/Tanh/Sigmoid are only "
-                              "supported when statistics are used.\nEND WARNING\n")
 
         mode_dict = {'activations': _enum_to_str(mode.activations), 'weights': _enum_to_str(mode.weights)}
         self.model.quantizer_metadata = {'type': type(self),
@@ -1921,13 +1935,24 @@ class PostTrainLinearQuantizer(Quantizer):
         if self.linear_quant_params:
             out['linear_quant_params'] = lqp_dict = OrderedDict()
             for k, v in self.linear_quant_params.items():  # type: str, torch.Tensor
-                lqp_dict[k] = v.item()
+                if v.numel() == 1:
+                    lqp_dict[k] = v.item()
 
         save_path = os.path.join(save_dir, 'layer_quant_params.yaml')
         distiller.yaml_ordered_save(save_path, out)
         msglogger.info('Per-layer quantization parameters saved to ' + save_path)
 
     def prepare_model(self, dummy_input=None):
+        if not self.model_activation_stats:
+            msglogger.warning("\nWARNING:\nNo stats file passed - Dynamic quantization will be used\n"
+                              "At the moment, this mode isn't as fully featured as stats-based quantization, and "
+                              "the accuracy results obtained are likely not as representative of real-world results."
+                              "\nSpecifically:\n"
+                              "  * Not all modules types are supported in this mode. Unsupported modules will remain "
+                              "in FP32.\n"
+                              "  * Optimizations for quantization of layers followed by Relu/Tanh/Sigmoid are only "
+                              "supported when statistics are used.\nEND WARNING\n")
+
         self.has_bidi_distiller_lstm = any(isinstance(m, distiller.modules.DistillerLSTM) and m.bidirectional for
                                            _, m in self.model.named_modules())
         if self.has_bidi_distiller_lstm:
diff --git a/examples/classifier_compression/README.md b/examples/classifier_compression/README.md
index 2c72e0215321c9121bc4f05620080eb83b856737..f893b2d6efe515e75194a27719cf418635781a1b 100644
--- a/examples/classifier_compression/README.md
+++ b/examples/classifier_compression/README.md
@@ -34,6 +34,7 @@ A non-exhaustive list of the methods implemented:
 ### Quantization
 
 - [Post-training quantization](https://github.com/NervanaSystems/distiller/tree/master/examples/quantization/post_train_quant/command_line.md) based on the TensorFlow quantization scheme (originally GEMMLOWP) with additional capabilities.
+  - Optimizing post-training quantization parameters with the [LAPQ](https://arxiv.org/abs/1911.07190) method - see [example YAML](https://github.com/NervanaSystems/distiller/blob/master/examples/quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml) file for details.
 - [Quantization-aware training](https://github.com/NervanaSystems/distiller/tree/master/examples/quantization/quant_aware_train): TensorFlow scheme, DoReFa, PACT
 
 ### Knowledge Distillation
diff --git a/examples/classifier_compression/compress_classifier.py b/examples/classifier_compression/compress_classifier.py
index a6904f7258205d28db4a67a96eac75cd43149c9f..88911c44daab29ecc6b42643a6284db7280ccf10 100755
--- a/examples/classifier_compression/compress_classifier.py
+++ b/examples/classifier_compression/compress_classifier.py
@@ -61,6 +61,7 @@ import distiller.apputils as apputils
 import parser
 import os
 import numpy as np
+from ptq_lapq import image_classifier_ptq_lapq
 
 
 # Logger handle
@@ -69,7 +70,7 @@ msglogger = logging.getLogger()
 
 def main():
     # Parse arguments
-    args = parser.add_cmdline_args(classifier.init_classifier_compression_arg_parser()).parse_args()
+    args = parser.add_cmdline_args(classifier.init_classifier_compression_arg_parser(True)).parse_args()
     app = ClassifierCompressorSampleApp(args, script_dir=os.path.dirname(__file__))
     if app.handle_subapps():
         return
@@ -110,10 +111,13 @@ def handle_subapps(model, criterion, optimizer, compression_scheduler, pylogger,
         sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities)
         do_exit = True
     elif args.evaluate:
-        test_loader = load_test_data(args)
-        classifier.evaluate_model(test_loader, model, criterion, pylogger,
-            classifier.create_activation_stats_collectors(model, *args.activation_stats),
-            args, scheduler=compression_scheduler)
+        if args.quantize_eval and args.qe_lapq:
+            image_classifier_ptq_lapq(model, criterion, pylogger, args)
+        else:
+            test_loader = load_test_data(args)
+            classifier.evaluate_model(test_loader, model, criterion, pylogger,
+                classifier.create_activation_stats_collectors(model, *args.activation_stats),
+                args, scheduler=compression_scheduler)
         do_exit = True
     elif args.thinnify:
         assert args.resumed_checkpoint_path is not None, \
diff --git a/examples/classifier_compression/parser.py b/examples/classifier_compression/parser.py
index 0385d0eee9e451ee699de01ac019abb26253d662..5697787ba7634992d4aabdc0cbfa72b875be7ad1 100755
--- a/examples/classifier_compression/parser.py
+++ b/examples/classifier_compression/parser.py
@@ -16,7 +16,7 @@
 
 import argparse
 import distiller
-import distiller.quantization
+import distiller.pruning
 import distiller.models as models
 
 
diff --git a/examples/classifier_compression/ptq_lapq.py b/examples/classifier_compression/ptq_lapq.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa793054412a9b803ad794e2ae667c76a6401ce7
--- /dev/null
+++ b/examples/classifier_compression/ptq_lapq.py
@@ -0,0 +1,99 @@
+#
+# Copyright (c) 2020 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import torch
+from copy import deepcopy
+import logging
+from collections import OrderedDict
+
+import distiller
+import distiller.apputils as apputils
+import distiller.apputils.image_classifier as classifier
+import distiller.quantization.ptq_coordinate_search as lapq
+
+
+msglogger = logging.getLogger()
+
+
+def image_classifier_ptq_lapq(model, criterion, loggers, args):
+    args = deepcopy(args)
+
+    effective_test_size_bak = args.effective_test_size
+    args.effective_test_size = args.lapq_eval_size
+    eval_data_loader = classifier.load_data(args, load_train=False, load_val=False, load_test=True, fixed_subset=True)
+
+    args.effective_test_size = effective_test_size_bak
+    test_data_loader = classifier.load_data(args, load_train=False, load_val=False, load_test=True)
+
+    model = model.eval()
+    device = next(model.parameters()).device
+
+    if args.lapq_eval_memoize_dataloader:
+        images_batches = []
+        targets_batches = []
+        for images, targets in eval_data_loader:
+            images_batches.append(images.to(device))
+            targets_batches.append(targets.to(device))
+        memoized_data_loader = [(torch.cat(images_batches), torch.cat(targets_batches))]
+    else:
+        memoized_data_loader = None
+
+    def eval_fn(model):
+        if memoized_data_loader:
+            loss = 0
+            for images, targets in memoized_data_loader:
+                outputs = model(images)
+                loss += criterion(outputs, targets).item()
+            loss = loss / len(memoized_data_loader)
+        else:
+            _, _, loss = classifier.test(eval_data_loader, model, criterion, loggers, None, args)
+        return loss
+
+    def test_fn(model):
+        top1, top5, loss = classifier.test(test_data_loader, model, criterion, loggers, None, args)
+        return OrderedDict([('top-1', top1), ('top-5', top5), ('loss', loss)])
+
+    args.device = device
+    if args.resumed_checkpoint_path:
+        args.load_model_path = args.resumed_checkpoint_path
+    if args.load_model_path:
+        msglogger.info("Loading checkpoint from %s" % args.load_model_path)
+        model = apputils.load_lean_checkpoint(model, args.load_model_path,
+                                              model_device=args.device)
+
+    quantizer = distiller.quantization.PostTrainLinearQuantizer.from_args(model, args)
+
+    dummy_input = torch.rand(*model.input_shape, device=args.device)
+    model, qp_dict = lapq.ptq_coordinate_search(quantizer, dummy_input, eval_fn, test_fn=test_fn,
+                                                **lapq.cmdline_args_to_dict(args))
+
+    results = test_fn(quantizer.model)
+    msglogger.info("Arch: %s \tTest: \t top1 = %.3f \t top5 = %.3f \t loss = %.3f" %
+                   (args.arch, results['top-1'], results['top-5'], results['loss']))
+    distiller.yaml_ordered_save('%s.quant_params_dict.yaml' % args.arch, qp_dict)
+
+    distiller.apputils.save_checkpoint(0, args.arch, model,
+                                       extras={'top1': results['top-1'], 'qp_dict': qp_dict}, name=args.name,
+                                       dir=msglogger.logdir)
+
+
+if __name__ == "__main__":
+    parser = classifier.init_classifier_compression_arg_parser(include_ptq_lapq_args=True)
+    args = parser.parse_args()
+    args.epochs = float('inf')  # hack for args parsing so there's no error in epochs
+    cc = classifier.ClassifierCompressor(args, script_dir=os.path.dirname(__file__))
+    image_classifier_ptq_lapq(cc.model, cc.criterion, [cc.pylogger, cc.tflogger], cc.args)
diff --git a/examples/quantization/post_train_quant/command_line.md b/examples/quantization/post_train_quant/command_line.md
index ce2c94862720c955ce1914fead16992d74467b3a..2c112ebc33d2ed14463f4485e0e60a9f170a8bc2 100644
--- a/examples/quantization/post_train_quant/command_line.md
+++ b/examples/quantization/post_train_quant/command_line.md
@@ -30,8 +30,9 @@ Post-training quantization can either be configured straight from the command-li
 | `--qe-stats-file`        | N/A       | Use stats file for static quantization of activations. See details below              | None    |
 | `--qe-dynamic`           | N/A       | Perform dynamic quantization. See details below                                       | None    |
 | `--qe-config-file`       | N/A       | Path to YAML config file. See section above. (ignores all other --qe* arguments)      | None    |
-| `--qe-convert-pytorch`   | `--qept`  | Convert the model to PyTorch native post-train quantization modules                   | Off     |
+| `--qe-convert-pytorch`   | `--qept`  | Convert the model to PyTorch native post-train quantization modules. See [tutorial](https://github.com/NervanaSystems/distiller/blob/master/jupyter/post_train_quant_convert_pytorch.ipynb) for more details | Off     |
 | `--qe-pytorch-backend`   | N/A       | When --qe-convert-pytorch is set, specifies the PyTorch quantization backend to use. Choices: "fbgemm", "qnnpack"   | Off     |
+| `--qe-lapq`              | N/A       | Optimize post-training quantization parameters using [LAPQ](https://arxiv.org/abs/1911.07190) method. Beyond the scope of this document. See [example YAML](https://github.com/NervanaSystems/distiller/blob/master/examples/quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml) file for details   | Off     |
 
 ### Notes
 
@@ -40,10 +41,6 @@ Post-training quantization can either be configured straight from the command-li
     * `--quantize-eval` is also set, in which case an FP32 model is first quantized using Distiller's post-training quantization flow, and then converted to a PyTorch native quantization model.
     * `--quantize-eval` is not set, but a previously post-train quantized model is loaded via `--resume`. In this case, the loaded model is converted to PyTorch native quantization.
 
-### Conversion to PyTorch Built-in Quantization Model
-
-PyTorch released built-in support for quantization in version 1.3. Currently Distiller's quantization functionality is still completely separate from PyTorch's. We provide the ability to take a model which was post-train quantized with Distiller, and is comprised of `RangeLinearQuantWrapper`
-
 ## "Net-Aware" Quantization
 
 The term "net-aware" quantization, coined in [this](https://arxiv.org/abs/1811.09886) paper from Facebook (section 3.2.2), means we can achieve better quantization by considering sequences of operations instead of just quantizing each operation independently. This isn't exactly layer fusion - in Distiller we modify activation stats prior to setting quantization parameters, in to make sure that when a module is followed by certain activation functions, only the relevant ranges are quantized. We do this for:
diff --git a/examples/quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml b/examples/quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml
index 87b42f28b99e196af6655680f609af4bc37d6022..c0b73481fdde04a15bbc539eb09835ad265a6815 100644
--- a/examples/quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml
+++ b/examples/quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml
@@ -21,7 +21,7 @@ quantizers:
     mode:
       activations: ASYMMETRIC_UNSIGNED
       weights: SYMMETRIC
-    model_activation_stats: ../../examples/quantization/post_train_quant/stats/resnet18_quant_stats.yaml
+    model_activation_stats: ../quantization/post_train_quant/stats/resnet18_quant_stats.yaml
     per_channel_wts: False
     inputs_quant_auto_fallback: True
 
@@ -47,12 +47,13 @@ quantizers:
 
 # Example invocations:
 #   * Preliminaries:
-#       cd <distiller_root>/distiller/quantization
-#       CONFIG_FILE="../../examples/quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml"
+#       cd <distiller_root>/examples/classifier_compression
+#       CONFIG_FILE="../quantization/post_train_quant/resnet18_imagenet_post_train_lapq.yaml"
+#       IMAGENET_PATH=<path_to_imagenet>
 #
 #   * Using L3 initialization:
 #     Command:
-#       python ptq_coordinate_search.py -a resnet18 --pretrained <path_to_imagenet> --opt-val-size 0.01 --opt-maxiter 2 --qe-config-file $CONFIG_FILE -b 500 --opt-init-mode L3 --opt-init-method powell --opt-eval-memoize-dataloader --det --opt-search-clipping
+#       python compress_classifier.py --eval --qe --qe-lapq -a resnet18 --pretrained $IMAGENET_PATH --lapq-eval-size 0.01 --lapq-maxiter 2 --qe-config-file $CONFIG_FILE -b 500 --lapq-init-mode L3 --lapq-init-method powell --lapq-eval-memoize-dataloader --det --lapq-search-clipping
 #
 #     Excerpts from output:
 #       ...
@@ -77,7 +78,7 @@ quantizers:
 #
 #   * Using LAPLACE initialization:
 #     Command:
-#       python ptq_coordinate_search.py -a resnet18 --pretrained <path_to_imagenet> --opt-val-size 0.01 --opt-maxiter 2 --qe-config-file $CONFIG_FILE -b 500 --opt-init-mode LAPLACE --opt-init-method powell --opt-eval-memoize-dataloader --det --opt-search-clipping
+#       python compress_classifier.py --eval --qe --qe-lapq -a resnet18 --pretrained $IMAGENET_PATH --lapq-eval-size 0.01 --lapq-maxiter 2 --qe-config-file $CONFIG_FILE -b 500 --lapq-init-mode LAPLACE --lapq-init-method powell --lapq-eval-memoize-dataloader --det --lapq-search-clipping
 #
 #     Excerpts from output:
 #       ...