From 9329a7d4cd10a080dad451e4173c85f2a04809d6 Mon Sep 17 00:00:00 2001 From: Matei Zaharia <matei@eecs.berkeley.edu> Date: Mon, 2 Sep 2013 10:15:22 -0700 Subject: [PATCH] Fix spark.io.compression.codec and change default codec to LZF --- .../scala/org/apache/spark/io/CompressionCodec.scala | 12 ++++-------- docs/configuration.md | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala index 90a0420caf..570a979b56 100644 --- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala +++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala @@ -39,17 +39,13 @@ trait CompressionCodec { private[spark] object CompressionCodec { def createCodec(): CompressionCodec = { - // Set the default codec to Snappy since the LZF implementation initializes a pretty large - // buffer for every stream, which results in a lot of memory overhead when the number of - // shuffle reduce buckets are large. - createCodec(classOf[SnappyCompressionCodec].getName) + createCodec(System.getProperty( + "spark.io.compression.codec", classOf[LZFCompressionCodec].getName)) } def createCodec(codecName: String): CompressionCodec = { - Class.forName( - System.getProperty("spark.io.compression.codec", codecName), - true, - Thread.currentThread.getContextClassLoader).newInstance().asInstanceOf[CompressionCodec] + Class.forName(codecName, true, Thread.currentThread.getContextClassLoader) + .newInstance().asInstanceOf[CompressionCodec] } } diff --git a/docs/configuration.md b/docs/configuration.md index aaf85ed4f4..310e78a9eb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -147,7 +147,7 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td>spark.io.compression.codec</td> - <td>org.apache.spark.io.<br />SnappyCompressionCodec</td> + <td>org.apache.spark.io.<br />LZFCompressionCodec</td> <td> The compression codec class to use for various compressions. By default, Spark provides two codecs: <code>org.apache.spark.io.LZFCompressionCodec</code> and <code>org.apache.spark.io.SnappyCompressionCodec</code>. -- GitLab