diff --git a/examples/agp-pruning/mobilenet_imagenet_baseline_training.yaml b/examples/agp-pruning/mobilenet_imagenet_baseline_training.yaml
index f126de609eae7e6942ede61521e5a71159d8b519..bd9402e5adfaff2b4b9df6fa6d900305b6422db0 100755
--- a/examples/agp-pruning/mobilenet_imagenet_baseline_training.yaml
+++ b/examples/agp-pruning/mobilenet_imagenet_baseline_training.yaml
@@ -1,26 +1,24 @@
-# https://github.com/shicai/MobileNet-Caffe/issues/9
 #
-# net: "train_val.prototxt"
-# #test_initialization: false
-# #test_iter: 100
-# #test_interval: 5000
-# display: 20
-# average_loss: 20
-# base_lr: 0.1
-# lr_policy: "poly"
-# power: 1.0
-# max_iter: 500000
-# momentum: 0.9
-# weight_decay: 0.0001
-# snapshot: 5000
-# snapshot_prefix: "mobilenet"
+# This YAML file contains the configuration and command-line arguments for training MobileNet v1 from scratch.
+# Top1: 71.156    Top5: 89.972
+#
+# compress_classifier.py --arch=mobilenet ../../../data.imagenet --lr=0.045 --batch=256 -j=32 --vs=0 --name=mobilenet_v1_training -p=50 --wd=1e-4 --epochs=200 --compress=../baseline_networks/mobilenet_imagenet_baseline_training.yaml
+#
+#
+# 2019-07-01 19:22:09,917 - ==> Best [Top1: 71.156   Top5: 89.972   Sparsity:0.00   Params: 4209088 on epoch: 199]
+# 2019-07-01 19:22:09,917 - Saving checkpoint to: logs/mobilenet_v1_training___2019.06.29-122534/mobilenet_v1_training_checkpoint.pth.tar
+# 2019-07-01 19:22:10,145 - --- test ---------------------
+# 2019-07-01 19:22:10,145 - 50000 samples (256 per mini-batch)
+# 2019-07-01 19:22:28,635 - Test: [   50/  195]    Loss 1.189988    Top1 70.539062    Top5 89.781250
+# 2019-07-01 19:22:35,567 - Test: [  100/  195]    Loss 1.182166    Top1 70.851562    Top5 89.792969
+# 2019-07-01 19:22:43,253 - Test: [  150/  195]    Loss 1.177892    Top1 70.927083    Top5 89.903646
+# 2019-07-01 19:22:50,377 - ==> Top1: 71.156    Top5: 89.972    Loss: 1.175
+#
 
-#  python compress_classifier.py -a mobilenet_050 --compress ../mobilenet/mobilenet_imagenet_baseline_training.yaml -j 22 ../../../data.imagenet -p 50 -b 256 --epochs 120 --lr 0.1 --wd 0.0001 --momentum 0.9
 lr_schedulers:
   training_lr:
-    class: PolynomialLR
-    T_max: 120
-    power: 1.0
+    class: ExponentialLR
+    gamma: 0.98
 
 policies:
     - lr_scheduler:
@@ -29,30 +27,4 @@ policies:
       ending_epoch: 200
       frequency: 1
 
-# MobileNet V1
-# https://arxiv.org/pdf/1704.04861.pdf
-# MobileNet models were trained in TensorFlow [1] using RMSprop [33] with asynchronous gradient descent similar
-# to Inception V3 [31]. However, contrary to training large models we use less regularization and data augmentation
-# techniques because small models have less trouble with overfitting. When training MobileNets we do not use
-# side heads or label smoothing and additionally reduce the amount image of distortions by limiting the size of small
-# crops that are used in large Inception training [31]. Additionally, we found that it was important to put very little
-# or no weight decay (l2 regularization) on the depthwise filters since their are so few parameters in them.
 
-# Inception V3
-# https://arxiv.org/pdf/1512.00567.pdf
-# ... Our best models were achieved using RMSProp [21] with decay of 0.9 and eps = 1.0.
-# We used a learning rate of 0.045, decayed every two epoch using an exponential rate of 0.94.
-
-# python compress_classifier.py -a mobilenet_050 --compress ../mobilenet/mobilenet_imagenet_baseline_training.yaml -j 11 ~/datasets/imagenet -p 400 -b 32 --epochs 100 --lr 0.045 --wd 0.0001 --momentum 0 --gpu 0
-
-#lr_schedulers:
-#  training_lr:
-#    class: ExponentialLR
-#    gamma: 0.94
-#
-#policies:
-#    - lr_scheduler:
-#        instance_name: training_lr
-#      starting_epoch: 0
-#      ending_epoch: 200
-#      frequency: 2