diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala index 90a0420cafb8f4b11400fdd643bd3b4c7f3860ed..570a979b56879effacd4d93ccdccc3f52f076e78 100644 --- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala +++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala @@ -39,17 +39,13 @@ trait CompressionCodec { private[spark] object CompressionCodec { def createCodec(): CompressionCodec = { - // Set the default codec to Snappy since the LZF implementation initializes a pretty large - // buffer for every stream, which results in a lot of memory overhead when the number of - // shuffle reduce buckets are large. - createCodec(classOf[SnappyCompressionCodec].getName) + createCodec(System.getProperty( + "spark.io.compression.codec", classOf[LZFCompressionCodec].getName)) } def createCodec(codecName: String): CompressionCodec = { - Class.forName( - System.getProperty("spark.io.compression.codec", codecName), - true, - Thread.currentThread.getContextClassLoader).newInstance().asInstanceOf[CompressionCodec] + Class.forName(codecName, true, Thread.currentThread.getContextClassLoader) + .newInstance().asInstanceOf[CompressionCodec] } } diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala index fd6f69041a94fc3cb0d2108869c62785bd12a397..ab81bfbe559479ce688fa024944bd982f445730e 100644 --- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala +++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala @@ -44,7 +44,7 @@ class CompressionCodecSuite extends FunSuite { test("default compression codec") { val codec = CompressionCodec.createCodec() - assert(codec.getClass === classOf[SnappyCompressionCodec]) + assert(codec.getClass === classOf[LZFCompressionCodec]) testCodec(codec) } diff --git a/docs/configuration.md b/docs/configuration.md index aaf85ed4f4c155d855e4519010b9b99757aec2a4..310e78a9ebcfda376d35d20ad140c77ee07f060a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -147,7 +147,7 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td>spark.io.compression.codec</td> - <td>org.apache.spark.io.<br />SnappyCompressionCodec</td> + <td>org.apache.spark.io.<br />LZFCompressionCodec</td> <td> The compression codec class to use for various compressions. By default, Spark provides two codecs: <code>org.apache.spark.io.LZFCompressionCodec</code> and <code>org.apache.spark.io.SnappyCompressionCodec</code>.