diff --git a/examples/agp-pruning/resnet50.schedule_agp.yaml b/examples/agp-pruning/resnet50.schedule_agp.yaml index b3ebb96c9bb128d337ad7d36cfe5509158729278..2c054e0b754543b36d691b4f42347b7d208c25eb 100755 --- a/examples/agp-pruning/resnet50.schedule_agp.yaml +++ b/examples/agp-pruning/resnet50.schedule_agp.yaml @@ -1,96 +1,86 @@ -# This schedule demonstrates high-rate element-wise pruning (70.66% sparsity) of Resnet 50. -# Top1 is 76.09 vs the published Top1: 76.15 (https://pytorch.org/docs/stable/torchvision/models.html) -# Top5 actually slightly improves the baseline: 92.95 vs. 92.87 in the baseline. +# This schedule demonstrates high-rate element-wise pruning (80% sparsity) of Resnet 50. +# Top1 is 76.0 vs the published Top1: 76.15 (https://pytorch.org/docs/stable/torchvision/models.html) +# Top5 is on par with the baseline. # -# The first layers are left unpruned, because the weights tensors are very small. The arithmetic-intensity is -# especially low, and the weight tensors are large, in module.layer4.*, so it's important to prune those. -# The Linear (fully-connected) layer is pruned to 87% because we have empirical evidence that the classifier layers -# are prune-friendly. +# The pruning level is uniform across all layers (80%), except for the first convolution. # -# time python3 compress_classifier.py -a=resnet50 --pretrained -p=50 ../../../data.imagenet/ -j=22 --epochs=80 --lr=0.001 --compress=resnet50.schedule_agp.yaml +# time python3 compress_classifier.py -a=resnet50 --pretrained -p=50 ../../../data.imagenet/ -j=12 --epochs=100 --lr=0.005 --compress=../agp-pruning/resnet50.schedule_agp.yaml --vs=0 # # Parameters: # +----+-------------------------------------+--------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+ # | | Name | Shape | NNZ (dense) | NNZ (sparse) | Cols (%) | Rows (%) | Ch (%) | 2D (%) | 3D (%) | Fine (%) | Std | Mean | Abs-Mean | # |----+-------------------------------------+--------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------| -# | 0 | module.conv1.weight | (64, 3, 7, 7) | 9408 | 9408 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.10415 | -0.00043 | 0.06379 | -# | 1 | module.layer1.0.conv1.weight | (64, 64, 1, 1) | 4096 | 4096 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.06023 | -0.00354 | 0.03393 | -# | 2 | module.layer1.0.conv2.weight | (64, 64, 3, 3) | 36864 | 36864 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02438 | 0.00069 | 0.01446 | -# | 3 | module.layer1.0.conv3.weight | (256, 64, 1, 1) | 16384 | 16384 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02961 | 0.00029 | 0.01786 | -# | 4 | module.layer1.0.downsample.0.weight | (256, 64, 1, 1) | 16384 | 16384 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.04820 | -0.00283 | 0.02690 | -# | 5 | module.layer1.1.conv1.weight | (64, 256, 1, 1) | 16384 | 16384 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02557 | 0.00102 | 0.01698 | -# | 6 | module.layer1.1.conv2.weight | (64, 64, 3, 3) | 36864 | 36864 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02391 | 0.00005 | 0.01633 | -# | 7 | module.layer1.1.conv3.weight | (256, 64, 1, 1) | 16384 | 16384 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02724 | 0.00000 | 0.01716 | -# | 8 | module.layer1.2.conv1.weight | (64, 256, 1, 1) | 16384 | 16384 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02513 | 0.00008 | 0.01828 | -# | 9 | module.layer1.2.conv2.weight | (64, 64, 3, 3) | 36864 | 36864 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02638 | -0.00052 | 0.01979 | -# | 10 | module.layer1.2.conv3.weight | (256, 64, 1, 1) | 16384 | 16384 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02573 | -0.00185 | 0.01547 | -# | 11 | module.layer2.0.conv1.weight | (128, 256, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02960 | -0.00121 | 0.02091 | -# | 12 | module.layer2.0.conv2.weight | (128, 128, 3, 3) | 147456 | 44237 | 0.00000 | 0.00000 | 0.00000 | 16.91895 | 0.00000 | 69.99986 | 0.01642 | -0.00020 | 0.00819 | -# | 13 | module.layer2.0.conv3.weight | (512, 128, 1, 1) | 65536 | 19661 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 14.25781 | 69.99969 | 0.02184 | 0.00012 | 0.01003 | -# | 14 | module.layer2.0.downsample.0.weight | (512, 256, 1, 1) | 131072 | 39322 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 12.30469 | 69.99969 | 0.01788 | -0.00027 | 0.00766 | -# | 15 | module.layer2.1.conv1.weight | (128, 512, 1, 1) | 65536 | 19661 | 0.00000 | 0.00000 | 12.69531 | 69.99969 | 0.00000 | 69.99969 | 0.01306 | 0.00001 | 0.00590 | -# | 16 | module.layer2.1.conv2.weight | (128, 128, 3, 3) | 147456 | 44237 | 0.00000 | 0.00000 | 0.00000 | 22.08862 | 0.00000 | 69.99986 | 0.01518 | 0.00013 | 0.00688 | -# | 17 | module.layer2.1.conv3.weight | (512, 128, 1, 1) | 65536 | 19661 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 1.36719 | 69.99969 | 0.01769 | -0.00086 | 0.00766 | -# | 18 | module.layer2.2.conv1.weight | (128, 512, 1, 1) | 65536 | 19661 | 0.00000 | 0.00000 | 1.56250 | 69.99969 | 0.00000 | 69.99969 | 0.01770 | -0.00046 | 0.00840 | -# | 19 | module.layer2.2.conv2.weight | (128, 128, 3, 3) | 147456 | 44237 | 0.00000 | 0.00000 | 0.00000 | 13.09814 | 0.00000 | 69.99986 | 0.01625 | -0.00011 | 0.00781 | -# | 20 | module.layer2.2.conv3.weight | (512, 128, 1, 1) | 65536 | 19661 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.58594 | 69.99969 | 0.01985 | -0.00020 | 0.00946 | -# | 21 | module.layer2.3.conv1.weight | (128, 512, 1, 1) | 65536 | 19661 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.00000 | 69.99969 | 0.01808 | -0.00053 | 0.00894 | -# | 22 | module.layer2.3.conv2.weight | (128, 128, 3, 3) | 147456 | 44237 | 0.00000 | 0.00000 | 0.00000 | 10.50415 | 0.00000 | 69.99986 | 0.01656 | -0.00033 | 0.00830 | -# | 23 | module.layer2.3.conv3.weight | (512, 128, 1, 1) | 65536 | 19661 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.97656 | 69.99969 | 0.01864 | -0.00055 | 0.00887 | -# | 24 | module.layer3.0.conv1.weight | (256, 512, 1, 1) | 131072 | 39322 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.00000 | 69.99969 | 0.02308 | -0.00061 | 0.01119 | -# | 25 | module.layer3.0.conv2.weight | (256, 256, 3, 3) | 589824 | 176948 | 0.00000 | 0.00000 | 0.00000 | 20.91217 | 0.00000 | 69.99986 | 0.01282 | -0.00018 | 0.00629 | -# | 26 | module.layer3.0.conv3.weight | (1024, 256, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 4.29688 | 69.99969 | 0.01763 | -0.00012 | 0.00857 | -# | 27 | module.layer3.0.downsample.0.weight | (1024, 512, 1, 1) | 524288 | 157287 | 0.00000 | 0.00000 | 0.00000 | 69.99989 | 3.90625 | 69.99989 | 0.01221 | 0.00008 | 0.00570 | -# | 28 | module.layer3.1.conv1.weight | (256, 1024, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 4.78516 | 69.99969 | 0.00000 | 69.99969 | 0.01180 | -0.00026 | 0.00566 | -# | 29 | module.layer3.1.conv2.weight | (256, 256, 3, 3) | 589824 | 176948 | 0.00000 | 0.00000 | 0.00000 | 15.36255 | 0.00000 | 69.99986 | 0.01139 | -0.00010 | 0.00554 | -# | 30 | module.layer3.1.conv3.weight | (1024, 256, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.58594 | 69.99969 | 0.01557 | -0.00074 | 0.00745 | -# | 31 | module.layer3.2.conv1.weight | (256, 1024, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.68359 | 69.99969 | 0.00000 | 69.99969 | 0.01202 | -0.00026 | 0.00573 | -# | 32 | module.layer3.2.conv2.weight | (256, 256, 3, 3) | 589824 | 176948 | 0.00000 | 0.00000 | 0.00000 | 10.70709 | 0.00000 | 69.99986 | 0.01117 | -0.00038 | 0.00554 | -# | 33 | module.layer3.2.conv3.weight | (1024, 256, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.09766 | 69.99969 | 0.01439 | -0.00038 | 0.00699 | -# | 34 | module.layer3.3.conv1.weight | (256, 1024, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.19531 | 69.99969 | 0.00000 | 69.99969 | 0.01311 | -0.00034 | 0.00638 | -# | 35 | module.layer3.3.conv2.weight | (256, 256, 3, 3) | 589824 | 176948 | 0.00000 | 0.00000 | 0.00000 | 10.32867 | 0.00000 | 69.99986 | 0.01108 | -0.00036 | 0.00556 | -# | 36 | module.layer3.3.conv3.weight | (1024, 256, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.09766 | 69.99969 | 0.01383 | -0.00064 | 0.00677 | -# | 37 | module.layer3.4.conv1.weight | (256, 1024, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.09766 | 69.99969 | 0.00000 | 69.99969 | 0.01362 | -0.00046 | 0.00669 | -# | 38 | module.layer3.4.conv2.weight | (256, 256, 3, 3) | 589824 | 176948 | 0.00000 | 0.00000 | 0.00000 | 11.27167 | 0.00000 | 69.99986 | 0.01105 | -0.00047 | 0.00555 | -# | 39 | module.layer3.4.conv3.weight | (1024, 256, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.00000 | 69.99969 | 0.01387 | -0.00094 | 0.00679 | -# | 40 | module.layer3.5.conv1.weight | (256, 1024, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.00000 | 69.99969 | 0.01472 | -0.00040 | 0.00731 | -# | 41 | module.layer3.5.conv2.weight | (256, 256, 3, 3) | 589824 | 176948 | 0.00000 | 0.00000 | 0.00000 | 12.88605 | 0.00000 | 69.99986 | 0.01132 | -0.00048 | 0.00570 | -# | 42 | module.layer3.5.conv3.weight | (1024, 256, 1, 1) | 262144 | 78644 | 0.00000 | 0.00000 | 0.00000 | 69.99969 | 0.09766 | 69.99969 | 0.01475 | -0.00139 | 0.00732 | -# | 43 | module.layer4.0.conv1.weight | (512, 1024, 1, 1) | 524288 | 157287 | 0.00000 | 0.00000 | 0.00000 | 69.99989 | 0.00000 | 69.99989 | 0.01754 | -0.00053 | 0.00888 | -# | 44 | module.layer4.0.conv2.weight | (512, 512, 3, 3) | 2359296 | 707789 | 0.00000 | 0.00000 | 0.00000 | 23.35434 | 0.00000 | 69.99999 | 0.00915 | -0.00021 | 0.00467 | -# | 45 | module.layer4.0.conv3.weight | (2048, 512, 1, 1) | 1048576 | 314573 | 0.00000 | 0.00000 | 0.00000 | 69.99998 | 0.00000 | 69.99998 | 0.01159 | -0.00026 | 0.00580 | -# | 46 | module.layer4.0.downsample.0.weight | (2048, 1024, 1, 1) | 2097152 | 629146 | 0.00000 | 0.00000 | 0.00000 | 69.99998 | 0.00000 | 69.99998 | 0.00760 | -0.00007 | 0.00368 | -# | 47 | module.layer4.1.conv1.weight | (512, 2048, 1, 1) | 1048576 | 314573 | 0.00000 | 0.00000 | 0.00000 | 69.99998 | 0.00000 | 69.99998 | 0.01140 | -0.00033 | 0.00571 | -# | 48 | module.layer4.1.conv2.weight | (512, 512, 3, 3) | 2359296 | 707789 | 0.00000 | 0.00000 | 0.00000 | 19.46831 | 0.00000 | 69.99999 | 0.00904 | -0.00044 | 0.00462 | -# | 49 | module.layer4.1.conv3.weight | (2048, 512, 1, 1) | 1048576 | 314573 | 0.00000 | 0.00000 | 0.00000 | 69.99998 | 0.00000 | 69.99998 | 0.01152 | 0.00007 | 0.00575 | -# | 50 | module.layer4.2.conv1.weight | (512, 2048, 1, 1) | 1048576 | 314573 | 0.00000 | 0.00000 | 0.00000 | 69.99998 | 0.00000 | 69.99998 | 0.01368 | -0.00014 | 0.00694 | -# | 51 | module.layer4.2.conv2.weight | (512, 512, 3, 3) | 2359296 | 707789 | 0.00000 | 0.00000 | 0.00000 | 38.29308 | 0.00000 | 69.99999 | 0.00789 | -0.00035 | 0.00409 | -# | 52 | module.layer4.2.conv3.weight | (2048, 512, 1, 1) | 1048576 | 314573 | 0.00000 | 0.00000 | 0.00000 | 69.99998 | 0.00000 | 69.99998 | 0.01075 | 0.00016 | 0.00524 | -# | 53 | module.fc.weight | (1000, 2048) | 2048000 | 266240 | 0.19531 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 87.00000 | 0.02998 | 0.00513 | 0.00979 | -# | 54 | Total sparsity: | - | 25502912 | 7481351 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 70.66472 | 0.00000 | 0.00000 | 0.00000 | +# | 0 | module.conv1.weight | (64, 3, 7, 7) | 9408 | 9408 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.10902 | -0.00039 | 0.06756 | +# | 1 | module.layer1.0.conv1.weight | (64, 64, 1, 1) | 4096 | 820 | 0.00000 | 0.00000 | 1.56250 | 79.98047 | 7.81250 | 79.98047 | 0.04406 | -0.00270 | 0.01620 | +# | 2 | module.layer1.0.conv2.weight | (64, 64, 3, 3) | 36864 | 7373 | 0.00000 | 0.00000 | 7.81250 | 36.27930 | 6.25000 | 79.99946 | 0.02160 | 0.00050 | 0.00779 | +# | 3 | module.layer1.0.conv3.weight | (256, 64, 1, 1) | 16384 | 3277 | 0.00000 | 0.00000 | 6.25000 | 79.99878 | 13.28125 | 79.99878 | 0.02543 | 0.00032 | 0.00974 | +# | 4 | module.layer1.0.downsample.0.weight | (256, 64, 1, 1) | 16384 | 3277 | 0.00000 | 0.00000 | 1.56250 | 79.99878 | 13.67188 | 79.99878 | 0.03585 | -0.00183 | 0.01348 | +# | 5 | module.layer1.1.conv1.weight | (64, 256, 1, 1) | 16384 | 3277 | 0.00000 | 0.00000 | 11.71875 | 79.99878 | 6.25000 | 79.99878 | 0.02139 | 0.00075 | 0.00844 | +# | 6 | module.layer1.1.conv2.weight | (64, 64, 3, 3) | 36864 | 7373 | 0.00000 | 0.00000 | 6.25000 | 30.76172 | 0.00000 | 79.99946 | 0.02009 | 0.00011 | 0.00763 | +# | 7 | module.layer1.1.conv3.weight | (256, 64, 1, 1) | 16384 | 3277 | 0.00000 | 0.00000 | 0.00000 | 79.99878 | 7.03125 | 79.99878 | 0.02291 | 0.00013 | 0.00891 | +# | 8 | module.layer1.2.conv1.weight | (64, 256, 1, 1) | 16384 | 3277 | 0.00000 | 0.00000 | 8.20312 | 79.99878 | 0.00000 | 79.99878 | 0.02034 | -0.00007 | 0.00816 | +# | 9 | module.layer1.2.conv2.weight | (64, 64, 3, 3) | 36864 | 7373 | 0.00000 | 0.00000 | 0.00000 | 26.29395 | 0.00000 | 79.99946 | 0.02126 | -0.00038 | 0.00860 | +# | 10 | module.layer1.2.conv3.weight | (256, 64, 1, 1) | 16384 | 3277 | 0.00000 | 0.00000 | 0.00000 | 79.99878 | 7.03125 | 79.99878 | 0.02220 | -0.00112 | 0.00856 | +# | 11 | module.layer2.0.conv1.weight | (128, 256, 1, 1) | 32768 | 6554 | 0.00000 | 0.00000 | 3.51562 | 79.99878 | 0.00000 | 79.99878 | 0.02269 | -0.00074 | 0.00903 | +# | 12 | module.layer2.0.conv2.weight | (128, 128, 3, 3) | 147456 | 29492 | 0.00000 | 0.00000 | 0.00000 | 32.34253 | 0.00000 | 79.99946 | 0.01436 | -0.00008 | 0.00567 | +# | 13 | module.layer2.0.conv3.weight | (512, 128, 1, 1) | 65536 | 13108 | 0.00000 | 0.00000 | 0.00000 | 79.99878 | 18.75000 | 79.99878 | 0.01925 | 0.00021 | 0.00717 | +# | 14 | module.layer2.0.downsample.0.weight | (512, 256, 1, 1) | 131072 | 26215 | 0.00000 | 0.00000 | 0.00000 | 79.99954 | 12.30469 | 79.99954 | 0.01469 | -0.00023 | 0.00518 | +# | 15 | module.layer2.1.conv1.weight | (128, 512, 1, 1) | 65536 | 13108 | 0.00000 | 0.00000 | 12.89062 | 79.99878 | 0.00000 | 79.99878 | 0.01206 | 0.00011 | 0.00439 | +# | 16 | module.layer2.1.conv2.weight | (128, 128, 3, 3) | 147456 | 29492 | 0.00000 | 0.00000 | 0.00000 | 36.49902 | 0.00000 | 79.99946 | 0.01451 | 0.00018 | 0.00548 | +# | 17 | module.layer2.1.conv3.weight | (512, 128, 1, 1) | 65536 | 13108 | 0.00000 | 0.00000 | 0.00000 | 79.99878 | 3.71094 | 79.99878 | 0.01631 | -0.00087 | 0.00588 | +# | 18 | module.layer2.2.conv1.weight | (128, 512, 1, 1) | 65536 | 13108 | 0.00000 | 0.00000 | 1.56250 | 79.99878 | 0.00000 | 79.99878 | 0.01590 | -0.00040 | 0.00605 | +# | 19 | module.layer2.2.conv2.weight | (128, 128, 3, 3) | 147456 | 29492 | 0.00000 | 0.00000 | 0.00000 | 28.51562 | 0.00000 | 79.99946 | 0.01464 | -0.00008 | 0.00558 | +# | 20 | module.layer2.2.conv3.weight | (512, 128, 1, 1) | 65536 | 13108 | 0.00000 | 0.00000 | 0.00000 | 79.99878 | 2.14844 | 79.99878 | 0.01771 | -0.00020 | 0.00682 | +# | 21 | module.layer2.3.conv1.weight | (128, 512, 1, 1) | 65536 | 13108 | 0.00000 | 0.00000 | 0.19531 | 79.99878 | 0.00000 | 79.99878 | 0.01613 | -0.00042 | 0.00634 | +# | 22 | module.layer2.3.conv2.weight | (128, 128, 3, 3) | 147456 | 29492 | 0.00000 | 0.00000 | 0.00000 | 24.03564 | 0.00000 | 79.99946 | 0.01476 | -0.00026 | 0.00586 | +# | 23 | module.layer2.3.conv3.weight | (512, 128, 1, 1) | 65536 | 13108 | 0.00000 | 0.00000 | 0.00000 | 79.99878 | 4.10156 | 79.99878 | 0.01678 | -0.00034 | 0.00641 | +# | 24 | module.layer3.0.conv1.weight | (256, 512, 1, 1) | 131072 | 26215 | 0.00000 | 0.00000 | 0.00000 | 79.99954 | 0.00000 | 79.99954 | 0.01981 | -0.00048 | 0.00781 | +# | 25 | module.layer3.0.conv2.weight | (256, 256, 3, 3) | 589824 | 117965 | 0.00000 | 0.00000 | 0.00000 | 38.29956 | 0.00000 | 79.99997 | 0.01108 | -0.00012 | 0.00427 | +# | 26 | module.layer3.0.conv3.weight | (1024, 256, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 4.39453 | 79.99992 | 0.01559 | -0.00001 | 0.00608 | +# | 27 | module.layer3.0.downsample.0.weight | (1024, 512, 1, 1) | 524288 | 104858 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 4.00391 | 79.99992 | 0.01054 | -0.00000 | 0.00388 | +# | 28 | module.layer3.1.conv1.weight | (256, 1024, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 4.58984 | 79.99992 | 0.00000 | 79.99992 | 0.01161 | -0.00015 | 0.00440 | +# | 29 | module.layer3.1.conv2.weight | (256, 256, 3, 3) | 589824 | 117965 | 0.00000 | 0.00000 | 0.00000 | 30.37567 | 0.00000 | 79.99997 | 0.01065 | -0.00009 | 0.00409 | +# | 30 | module.layer3.1.conv3.weight | (1024, 256, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.68359 | 79.99992 | 0.01423 | -0.00072 | 0.00548 | +# | 31 | module.layer3.2.conv1.weight | (256, 1024, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.68359 | 79.99992 | 0.00000 | 79.99992 | 0.01134 | -0.00020 | 0.00424 | +# | 32 | module.layer3.2.conv2.weight | (256, 256, 3, 3) | 589824 | 117965 | 0.00000 | 0.00000 | 0.00000 | 23.76862 | 0.00000 | 79.99997 | 0.01032 | -0.00033 | 0.00400 | +# | 33 | module.layer3.2.conv3.weight | (1024, 256, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.19531 | 79.99992 | 0.01298 | -0.00031 | 0.00501 | +# | 34 | module.layer3.3.conv1.weight | (256, 1024, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.19531 | 79.99992 | 0.00000 | 79.99992 | 0.01234 | -0.00023 | 0.00471 | +# | 35 | module.layer3.3.conv2.weight | (256, 256, 3, 3) | 589824 | 117965 | 0.00000 | 0.00000 | 0.00000 | 23.16437 | 0.00000 | 79.99997 | 0.01036 | -0.00030 | 0.00404 | +# | 36 | module.layer3.3.conv3.weight | (1024, 256, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.39062 | 79.99992 | 0.01273 | -0.00055 | 0.00495 | +# | 37 | module.layer3.4.conv1.weight | (256, 1024, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.09766 | 79.99992 | 0.00000 | 79.99992 | 0.01271 | -0.00035 | 0.00492 | +# | 38 | module.layer3.4.conv2.weight | (256, 256, 3, 3) | 589824 | 117965 | 0.00000 | 0.00000 | 0.00000 | 24.42474 | 0.00000 | 79.99997 | 0.01033 | -0.00038 | 0.00405 | +# | 39 | module.layer3.4.conv3.weight | (1024, 256, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.29297 | 79.99992 | 0.01291 | -0.00077 | 0.00505 | +# | 40 | module.layer3.5.conv1.weight | (256, 1024, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.00000 | 79.99992 | 0.01351 | -0.00029 | 0.00532 | +# | 41 | module.layer3.5.conv2.weight | (256, 256, 3, 3) | 589824 | 117965 | 0.00000 | 0.00000 | 0.00000 | 26.96075 | 0.00000 | 79.99997 | 0.01055 | -0.00040 | 0.00417 | +# | 42 | module.layer3.5.conv3.weight | (1024, 256, 1, 1) | 262144 | 52429 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.68359 | 79.99992 | 0.01390 | -0.00120 | 0.00555 | +# | 43 | module.layer4.0.conv1.weight | (512, 1024, 1, 1) | 524288 | 104858 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.00000 | 79.99992 | 0.01559 | -0.00040 | 0.00635 | +# | 44 | module.layer4.0.conv2.weight | (512, 512, 3, 3) | 2359296 | 471860 | 0.00000 | 0.00000 | 0.00000 | 38.93700 | 0.00000 | 79.99997 | 0.00838 | -0.00015 | 0.00335 | +# | 45 | module.layer4.0.conv3.weight | (2048, 512, 1, 1) | 1048576 | 209716 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.00000 | 79.99992 | 0.01160 | -0.00020 | 0.00466 | +# | 46 | module.layer4.0.downsample.0.weight | (2048, 1024, 1, 1) | 2097152 | 419431 | 0.00000 | 0.00000 | 0.00000 | 79.99997 | 0.00000 | 79.99997 | 0.00780 | -0.00013 | 0.00296 | +# | 47 | module.layer4.1.conv1.weight | (512, 2048, 1, 1) | 1048576 | 209716 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.00000 | 79.99992 | 0.01202 | -0.00025 | 0.00479 | +# | 48 | module.layer4.1.conv2.weight | (512, 512, 3, 3) | 2359296 | 471860 | 0.00000 | 0.00000 | 0.00000 | 33.88023 | 0.00000 | 79.99997 | 0.00884 | -0.00036 | 0.00357 | +# | 49 | module.layer4.1.conv3.weight | (2048, 512, 1, 1) | 1048576 | 209716 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.00000 | 79.99992 | 0.01205 | 0.00008 | 0.00487 | +# | 50 | module.layer4.2.conv1.weight | (512, 2048, 1, 1) | 1048576 | 209716 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.00000 | 79.99992 | 0.01396 | -0.00011 | 0.00568 | +# | 51 | module.layer4.2.conv2.weight | (512, 512, 3, 3) | 2359296 | 471860 | 0.00000 | 0.00000 | 0.00000 | 50.91476 | 0.00000 | 79.99997 | 0.00723 | -0.00022 | 0.00303 | +# | 52 | module.layer4.2.conv3.weight | (2048, 512, 1, 1) | 1048576 | 209716 | 0.00000 | 0.00000 | 0.00000 | 79.99992 | 0.00000 | 79.99992 | 0.00957 | 0.00020 | 0.00386 | +# | 53 | module.fc.weight | (1000, 2048) | 2048000 | 409600 | 0.00000 | 0.04883 | 0.00000 | 0.00000 | 0.00000 | 80.00000 | 0.03149 | 0.00414 | 0.01235 | +# | 54 | Total sparsity: | - | 25502912 | 5108133 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 79.97039 | 0.00000 | 0.00000 | 0.00000 | # +----+-------------------------------------+--------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+ -# Total sparsity: 70.66 +# 2019-03-20 18:14:17,059 - Total sparsity: 79.97 # -# 2018-10-01 20:57:09,476 - --- validate (epoch=95)----------- -# 2018-10-01 20:57:09,476 - 128116 samples (256 per mini-batch) -# 2018-10-01 20:57:28,241 - Epoch: [95][ 50/ 500] Loss 1.044524 Top1 75.039062 Top5 90.968750 -# 2018-10-01 20:57:36,132 - Epoch: [95][ 100/ 500] Loss 1.057046 Top1 74.875000 Top5 90.699219 -# 2018-10-01 20:57:44,244 - Epoch: [95][ 150/ 500] Loss 1.066284 Top1 74.627604 Top5 90.575521 -# 2018-10-01 20:57:52,479 - Epoch: [95][ 200/ 500] Loss 1.058866 Top1 74.718750 Top5 90.589844 -# 2018-10-01 20:58:00,566 - Epoch: [95][ 250/ 500] Loss 1.062525 Top1 74.531250 Top5 90.540625 -# 2018-10-01 20:58:08,773 - Epoch: [95][ 300/ 500] Loss 1.060124 Top1 74.542969 Top5 90.552083 -# 2018-10-01 20:58:17,233 - Epoch: [95][ 350/ 500] Loss 1.063018 Top1 74.493304 Top5 90.493304 -# 2018-10-01 20:58:24,937 - Epoch: [95][ 400/ 500] Loss 1.062629 Top1 74.418945 Top5 90.518555 -# 2018-10-01 20:58:33,467 - Epoch: [95][ 450/ 500] Loss 1.064152 Top1 74.388889 Top5 90.502604 -# 2018-10-01 20:58:41,221 - Epoch: [95][ 500/ 500] Loss 1.064142 Top1 74.372656 Top5 90.492969 -# 2018-10-01 20:58:41,290 - ==> Top1: 74.374 Top5: 90.496 Loss: 1.064 +# 2019-03-20 18:14:17,059 - --- validate (epoch=98)----------- +# 2019-03-20 18:14:17,059 - 50000 samples (256 per mini-batch) +# 2019-03-20 18:14:47,289 - Epoch: [98][ 50/ 195] Loss 0.958758 Top1 75.703125 Top5 92.843750 +# 2019-03-20 18:15:09,204 - Epoch: [98][ 100/ 195] Loss 0.961983 Top1 75.789062 Top5 92.804688 +# 2019-03-20 18:15:35,028 - Epoch: [98][ 150/ 195] Loss 0.956074 Top1 75.776042 Top5 92.848958 +# 2019-03-20 18:15:50,982 - ==> Top1: 75.838 Top5: 92.868 Loss: 0.959 # -# --- test --------------------- -# 50000 samples (256 per mini-batch) -# Test: [ 50/ 195] Loss 0.678497 Top1 82.101562 Top5 96.054688 -# Test: [ 100/ 195] Loss 0.801957 Top1 79.386719 Top5 94.843750 -# Test: [ 150/ 195] Loss 0.916142 Top1 77.119792 Top5 93.453125 -# ==> Top1: 76.086 Top5: 92.950 Loss: 0.960 +# 2019-03-20 18:15:50,998 - ==> Best [Top1: 75.990 Top5: 92.872 Sparsity:79.97 Params: 5108133 on epoch: 94] +# 2019-03-20 18:15:50,998 - Saving checkpoint to: logs/2019.03.18-090917/checkpoint.pth.tar +# +# real 3463m11.943s +# user 31959m34.272s +# sys 2745m57.392s version: 1 @@ -98,26 +88,26 @@ pruners: fc_pruner: class: AutomatedGradualPruner initial_sparsity : 0.05 - final_sparsity: 0.87 + final_sparsity: 0.80 weights: module.fc.weight - mid_pruner: + conv_pruner: class: AutomatedGradualPruner initial_sparsity : 0.05 - final_sparsity: 0.70 + final_sparsity: 0.80 weights: [ #module.conv1.weight, - #module.layer1.0.conv1.weight, - #module.layer1.0.conv2.weight, - #module.layer1.0.conv3.weight, - #module.layer1.0.downsample.0.weight, - #module.layer1.1.conv1.weight, - #module.layer1.1.conv2.weight, - #module.layer1.1.conv3.weight, - #module.layer1.2.conv1.weight, - #module.layer1.2.conv2.weight, - #module.layer1.2.conv3.weight, - #module.layer2.0.conv1.weight, + module.layer1.0.conv1.weight, + module.layer1.0.conv2.weight, + module.layer1.0.conv3.weight, + module.layer1.0.downsample.0.weight, + module.layer1.1.conv1.weight, + module.layer1.1.conv2.weight, + module.layer1.1.conv3.weight, + module.layer1.2.conv1.weight, + module.layer1.2.conv2.weight, + module.layer1.2.conv3.weight, + module.layer2.0.conv1.weight, module.layer2.0.conv2.weight, module.layer2.0.conv3.weight, module.layer2.0.downsample.0.weight, @@ -153,22 +143,13 @@ pruners: module.layer4.0.conv2.weight, module.layer4.0.conv3.weight, module.layer4.0.downsample.0.weight, - #module.layer4.1.conv1.weight, - #module.layer4.1.conv2.weight, + module.layer4.1.conv1.weight, + module.layer4.1.conv2.weight, module.layer4.1.conv3.weight, module.layer4.2.conv1.weight, module.layer4.2.conv2.weight, module.layer4.2.conv3.weight] - low_pruner: - class: AutomatedGradualPruner - initial_sparsity : 0.05 - final_sparsity: 0.70 - weights: [ - module.layer4.1.conv1.weight, - module.layer4.1.conv2.weight] - - lr_schedulers: pruning_lr: class: ExponentialLR @@ -177,22 +158,16 @@ lr_schedulers: policies: - pruner: - instance_name : low_pruner - starting_epoch: 0 - ending_epoch: 30 - frequency: 2 - - - pruner: - instance_name : mid_pruner + instance_name : conv_pruner starting_epoch: 0 - ending_epoch: 30 - frequency: 2 + ending_epoch: 35 + frequency: 1 - pruner: instance_name : fc_pruner starting_epoch: 1 - ending_epoch: 29 - frequency: 2 + ending_epoch: 35 + frequency: 1 - lr_scheduler: instance_name: pruning_lr