diff --git a/distiller/models/cifar10/plain_cifar.py b/distiller/models/cifar10/plain_cifar.py index 7f668a5e346d019beb171ff6b2fdbb3e4158053f..0ef88514a3e9b6ce99025a646790ef8b7126fb29 100755 --- a/distiller/models/cifar10/plain_cifar.py +++ b/distiller/models/cifar10/plain_cifar.py @@ -83,14 +83,6 @@ class PlainCifar(nn.Module): self.avgpool = nn.AvgPool2d(8, stride=1) self.fc = nn.Linear(64 * block.expansion, num_classes) - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - def _make_layer(self, block, planes, num_blocks, stride, batch_norm=True): # Each layer is composed on 2*num_blocks blocks, and the first block usually # performs downsampling of the input, and doubling of the number of filters/feature-maps. diff --git a/distiller/models/cifar10/preresnet_cifar.py b/distiller/models/cifar10/preresnet_cifar.py index 9a8b2e9e884e70ca462e1a24e25f3d85a23ccf44..4210647ffacb8dd91bab3a57e5f5cf7cda6a61f5 100644 --- a/distiller/models/cifar10/preresnet_cifar.py +++ b/distiller/models/cifar10/preresnet_cifar.py @@ -116,14 +116,6 @@ class PreactResNetCifar(nn.Module): self.avgpool = nn.AvgPool2d(8, stride=1) self.fc = nn.Linear(64 * block.expansion, num_classes) - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - def _make_layer(self, layer_gates, block, planes, blocks, stride=1, conv_downsample=False): downsample = None outplanes = planes * block.expansion diff --git a/distiller/models/cifar10/resnet_cifar.py b/distiller/models/cifar10/resnet_cifar.py index ca31731feaa115a21420b80973e6b23d1fd1f09f..dc8432f5fa6b930a95db305eabc16e2ea82fe024 100755 --- a/distiller/models/cifar10/resnet_cifar.py +++ b/distiller/models/cifar10/resnet_cifar.py @@ -87,7 +87,6 @@ class BasicBlock(nn.Module): class ResNetCifar(nn.Module): - def __init__(self, block, layers, num_classes=NUM_CLASSES): self.nlayers = 0 # Each layer manages its own gates @@ -109,14 +108,6 @@ class ResNetCifar(nn.Module): self.avgpool = nn.AvgPool2d(8, stride=1) self.fc = nn.Linear(64 * block.expansion, num_classes) - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - def _make_layer(self, layer_gates, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: diff --git a/distiller/models/cifar10/vgg_cifar.py b/distiller/models/cifar10/vgg_cifar.py index 0b5a5bb0d3047843879834778cc8cd8d6ae8f881..ec823797c8abaee01156beb957339c5551408ac2 100755 --- a/distiller/models/cifar10/vgg_cifar.py +++ b/distiller/models/cifar10/vgg_cifar.py @@ -32,12 +32,10 @@ __all__ = [ class VGGCifar(nn.Module): - def __init__(self, features, num_classes=10, init_weights=True): + def __init__(self, features, num_classes=10): super(VGGCifar, self).__init__() self.features = features self.classifier = nn.Linear(512, num_classes) - if init_weights: - self._initialize_weights() def forward(self, x): x = self.features(x) @@ -45,19 +43,6 @@ class VGGCifar(nn.Module): x = self.classifier(x) return x - def _initialize_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') - if m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.constant_(m.bias, 0) - def make_layers(cfg, batch_norm=False): layers = []