From c58d4ea3d46ec7b72f7ced17d5c4193ee42befa0 Mon Sep 17 00:00:00 2001 From: Patrick Wendell <pwendell@gmail.com> Date: Thu, 23 Jan 2014 18:12:40 -0800 Subject: [PATCH] Response to Matei's review --- .../scala/org/apache/spark/SparkContext.scala | 27 ++++++++++--------- .../spark/api/java/JavaSparkContext.scala | 16 +++++------ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 7ff06b5774..566472e597 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -355,7 +355,7 @@ class SparkContext( * @param valueClass Class of the values * @param minSplits Minimum number of Hadoop Splits to generate. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -374,7 +374,7 @@ class SparkContext( /** Get an RDD for a Hadoop file with an arbitrary InputFormat * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -407,7 +407,7 @@ class SparkContext( * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minSplits) * }}} * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -428,8 +428,9 @@ class SparkContext( * can just write, for example, * {{{ * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path) + * }}} * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -453,7 +454,7 @@ class SparkContext( * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat * and extra configuration options to pass to the input format. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -474,7 +475,7 @@ class SparkContext( * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat * and extra configuration options to pass to the input format. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -489,12 +490,12 @@ class SparkContext( /** Get an RDD for a Hadoop SequenceFile with given key and value types. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. - * */ - def sequenceFile[K: ClassTag, V: ClassTag](path: String, + */ + def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V], minSplits: Int @@ -505,12 +506,12 @@ class SparkContext( /** Get an RDD for a Hadoop SequenceFile with given key and value types. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. * */ - def sequenceFile[K: ClassTag, V: ClassTag](path: String, keyClass: Class[K], valueClass: Class[V] + def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V] ): RDD[(K, V)] = sequenceFile(path, keyClass, valueClass, defaultMinSplits) @@ -530,7 +531,7 @@ class SparkContext( * for the appropriate type. In addition, we pass the converter a ClassTag of its type to * allow it to figure out the Writable class to use in the subclass case. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -1058,7 +1059,7 @@ object SparkContext { implicit def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]) = new AsyncRDDActions(rdd) implicit def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag]( - rdd: RDD[(K, V)]) = + rdd: RDD[(K, V)]) = new SequenceFileRDDFunctions(rdd) implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassTag, V: ClassTag]( diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala index 75b8e76b50..5a426b9835 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala @@ -139,7 +139,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork /** Get an RDD for a Hadoop SequenceFile with given key and value types. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -156,7 +156,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork /** Get an RDD for a Hadoop SequenceFile. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -197,7 +197,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable, * etc). * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -218,7 +218,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable, * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -236,7 +236,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork /** Get an RDD for a Hadoop file with an arbitrary InputFormat. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -255,7 +255,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork /** Get an RDD for a Hadoop file with an arbitrary InputFormat * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -276,7 +276,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat * and extra configuration options to pass to the input format. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. @@ -296,7 +296,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat * and extra configuration options to pass to the input format. * - * Note: Because Hadoop's RecordReader class re-uses the same Writable object for each + * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each * record, directly caching the returned RDD will create many references to the same object. * If you plan to directly cache Hadoop writable objects, you should first copy them using * a `map` function. -- GitLab