diff --git a/distiller/utils.py b/distiller/utils.py
index e2f624ac30f2be8da0f50b7169d4d5c12faef5f9..510a481d276509548c200710dfa77fb55d40a6c9 100755
--- a/distiller/utils.py
+++ b/distiller/utils.py
@@ -431,7 +431,7 @@ def activation_channels_means(activation):
     The activation usually has the shape: (batch_size, num_channels, h, w).
 
     "We first use global average pooling to convert the output of layer i, which is a
-    c x h x w tensor, into a 1 x c vector."
+    c x h x w tensor, into a 1 x c vector."
 
     Returns - for each channel: the batch-mean of its L1 magnitudes (i.e. over all of the
     activations in the mini-batch, compute the mean of the L1 magnitude of each channel).
@@ -457,7 +457,7 @@ def activation_channels_apoz(activation):
     The activation usually has the shape: (batch_size, num_channels, h, w).
 
     "We first use global average pooling to convert the output of layer i, which is a
-    c x h x w tensor, into a 1 x c vector."
+    c x h x w tensor, into a 1 x c vector."
 
     Returns - for each channel: the batch-mean of its sparsity.
     """