diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala index 73495a8d7d7fd3ef8c8d7865f789326f3d1afe7b..c3764ac671afb7d436a66da69a64742c27459e15 100644 --- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala +++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala @@ -166,7 +166,7 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock) } /** - * Send ExecutorRemoved to the event loop to remove a executor. Only for test. + * Send ExecutorRemoved to the event loop to remove an executor. Only for test. * * @return if HeartbeatReceiver is stopped, return None. Otherwise, return a Some(Future) that * indicate if this operation is successful. diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index faed4f4dc95736c3788945a7632d96259176ae98..f8aac3008cefacde57fe4bc2fac635b86ec6f27d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -929,7 +929,7 @@ private[deploy] class Master( exec.state = ExecutorState.KILLED } - /** Generate a new app ID given a app's submission date */ + /** Generate a new app ID given an app's submission date */ private def newApplicationId(submitDate: Date): String = { val appId = "app-%s-%04d".format(createDateFormat.format(submitDate), nextAppNumber) nextAppNumber += 1 diff --git a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala index f8167074c6dfa62c0554d310a360b9f601f2b95b..f1915857ea43af15a4b8ef38dc5d9c3dc7938a75 100644 --- a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala +++ b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala @@ -24,7 +24,7 @@ import scala.collection.mutable import org.apache.spark.internal.Logging /** - * Implements policies and bookkeeping for sharing a adjustable-sized pool of memory between tasks. + * Implements policies and bookkeeping for sharing an adjustable-sized pool of memory between tasks. * * Tries to ensure that each task gets a reasonable share of memory, instead of some task ramping up * to a large amount first and then causing others to spill to disk repeatedly. diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index a7142376017d54807039ae20debef48f1828a598..104e0cb37155f4f744272726bca5d559207d36d7 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -1054,7 +1054,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) val warningMessage = s"$outputCommitterClass may be an output committer that writes data directly to " + "the final location. Because speculation is enabled, this output committer may " + - "cause data loss (see the case in SPARK-10063). If possible, please use a output " + + "cause data loss (see the case in SPARK-10063). If possible, please use an output " + "committer that does not have this behavior (e.g. FileOutputCommitter)." logWarning(warningMessage) } @@ -1142,7 +1142,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) val warningMessage = s"$outputCommitterClass may be an output committer that writes data directly to " + "the final location. Because speculation is enabled, this output committer may " + - "cause data loss (see the case in SPARK-10063). If possible, please use a output " + + "cause data loss (see the case in SPARK-10063). If possible, please use an output " + "committer that does not have this behavior (e.g. FileOutputCommitter)." logWarning(warningMessage) } diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala index fffbd5cd44a23b40073e173ad7aa4afed4f0061a..ae4a6003517ccd5d19d74af0caa7685de4fbd524 100644 --- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala +++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala @@ -52,7 +52,7 @@ private[netty] case class RemoteProcessConnectionError(cause: Throwable, remoteA extends InboxMessage /** - * A inbox that stores messages for an [[RpcEndpoint]] and posts messages to it thread-safely. + * An inbox that stores messages for an [[RpcEndpoint]] and posts messages to it thread-safely. */ private[netty] class Inbox( val endpointRef: NettyRpcEndpointRef, diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala index 7e1197d742802c5a20a46781611f9f406710c05b..642bf81ac087e35780404c0e2c04968c5db7bbb5 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala @@ -20,7 +20,7 @@ package org.apache.spark.scheduler import org.apache.spark.executor.ExecutorExitCode /** - * Represents an explanation for a executor or whole slave failing or exiting. + * Represents an explanation for an executor or whole slave failing or exiting. */ private[spark] class ExecutorLossReason(val message: String) extends Serializable { diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala index 745ef126913f5268e2ae0cfee1debce6e4e49813..9dc274c9fe2881a44152fb811d8c9204e82f8b5a 100644 --- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala +++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala @@ -134,7 +134,7 @@ private[spark] class SerializerManager(defaultSerializer: Serializer, conf: Spar } /** - * Deserializes a InputStream into an iterator of values and disposes of it when the end of + * Deserializes an InputStream into an iterator of values and disposes of it when the end of * the iterator is reached. */ def dataDeserializeStream[T: ClassTag]( diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md index a4e17fd24eac298826e646feb482facba9f583fa..479140f519103ff9501860107ca1ee6c7967a9b2 100644 --- a/docs/streaming-custom-receivers.md +++ b/docs/streaming-custom-receivers.md @@ -36,7 +36,7 @@ Any exception in the receiving threads should be caught and handled properly to failures of the receiver. `restart(<exception>)` will restart the receiver by asynchronously calling `onStop()` and then calling `onStart()` after a delay. `stop(<exception>)` will call `onStop()` and terminate the receiver. Also, `reportError(<error>)` -reports a error message to the driver (visible in the logs and UI) without stopping / restarting +reports an error message to the driver (visible in the logs and UI) without stopping / restarting the receiver. The following is a custom receiver that receives a stream of text over a socket. It treats diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 4d0a1122dcd12971173b6d84e321360a7ec47bda..d7eafff38f35ba23681d471e903dab45db839c6b 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -612,7 +612,7 @@ as well as to run the receiver(s). - When running a Spark Streaming program locally, do not use "local" or "local[1]" as the master URL. Either of these means that only one thread will be used for running tasks locally. If you are using - a input DStream based on a receiver (e.g. sockets, Kafka, Flume, etc.), then the single thread will + an input DStream based on a receiver (e.g. sockets, Kafka, Flume, etc.), then the single thread will be used to run the receiver, leaving no thread for processing the received data. Hence, when running locally, always use "local[*n*]" as the master URL, where *n* > number of receivers to run (see [Spark Properties](configuration.html#spark-properties) for information on how to set @@ -1788,7 +1788,7 @@ This example appends the word counts of network data into a file. This behavior is made simple by using `JavaStreamingContext.getOrCreate`. This is used as follows. {% highlight java %} -// Create a factory object that can create a and setup a new JavaStreamingContext +// Create a factory object that can create and setup a new JavaStreamingContext JavaStreamingContextFactory contextFactory = new JavaStreamingContextFactory() { @Override public JavaStreamingContext create() { JavaStreamingContext jssc = new JavaStreamingContext(...); // new context diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java index 1cba565b38c2aeb39a3fc48a39089cfec0a8bde9..e20b94d5b03f2d146a30d29df7ff4ff43aae3f82 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java @@ -70,7 +70,7 @@ public class JavaCustomReceiver extends Receiver<String> { SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); - // Create a input stream with the custom receiver on target ip:port and count the + // Create an input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') JavaReceiverInputDStream<String> lines = ssc.receiverStream( new JavaCustomReceiver(args[0], Integer.parseInt(args[1]))); diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala index 1d144db9864bdb7596fd973adb6d7389f88358a1..f70975eeb5c9985bf67d4fa4c6566015a676ca36 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala @@ -50,7 +50,7 @@ object CustomReceiver { val sparkConf = new SparkConf().setAppName("CustomReceiver") val ssc = new StreamingContext(sparkConf, Seconds(1)) - // Create a input stream with the custom receiver on target ip:port and count the + // Create an input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt)) val words = lines.flatMap(_.split(" ")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 0ab4459bdb9df0c77af9162fa61d801edf7e53c2..61e355ab9fba1cd009fc6b2806a79df7e5dabfc3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -686,7 +686,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] { /** * MultiClassSummarizer computes the number of distinct labels and corresponding counts, * and validates the data to see if the labels used for k class multi-label classification - * are in the range of {0, 1, ..., k - 1} in a online fashion. + * are in the range of {0, 1, ..., k - 1} in an online fashion. * * Two MultilabelSummarizer can be merged together to have a statistical summary of the * corresponding joint dataset. @@ -923,7 +923,7 @@ class BinaryLogisticRegressionSummary private[classification] ( /** * LogisticAggregator computes the gradient and loss for binary logistic loss function, as used - * in binary classification for instances in sparse or dense vector in a online fashion. + * in binary classification for instances in sparse or dense vector in an online fashion. * * Note that multinomial logistic loss is not supported yet! * diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index 82f2de7ccdfdb604a634b0171144846bc937735d..ecec61a72f823985265038c6e0e7bc4040fd8922 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -790,7 +790,7 @@ trait Params extends Identifiable with Serializable { * :: DeveloperApi :: * Java-friendly wrapper for [[Params]]. * Java developers who need to extend [[Params]] should use this class instead. - * If you need to extend a abstract class which already extends [[Params]], then that abstract + * If you need to extend an abstract class which already extends [[Params]], then that abstract * class should be Java-friendly as well. */ @DeveloperApi diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index e63eb710806045411ee430f21dd381f8cc220359..00ef6ccc74d286590cdb8d070f00d0d39405b0a1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -396,7 +396,7 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel] /** * AFTAggregator computes the gradient and loss for a AFT loss function, - * as used in AFT survival regression for samples in sparse or dense vector in a online fashion. + * as used in AFT survival regression for samples in sparse or dense vector in an online fashion. * * The loss function and likelihood function under the AFT model based on: * Lawless, J. F., Statistical Models and Methods for Lifetime Data, diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 37552194c57d3d404c8486704311d1b75cad3c39..4d66b0eb37ab43c74ee156aaf009eaa1483fa5d0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -731,7 +731,7 @@ class LinearRegressionSummary private[regression] ( /** * LeastSquaresAggregator computes the gradient and loss for a Least-squared loss function, - * as used in linear regression for samples in sparse or dense vector in a online fashion. + * as used in linear regression for samples in sparse or dense vector in an online fashion. * * Two LeastSquaresAggregator can be merged together to have a summary of loss and gradient of * the corresponding joint dataset. diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala index b836d2a2340e69e5cf1646e60b348141c06f3475..7d12f447f796340a1faaf337adb76dcb8b3dc136 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala @@ -74,7 +74,7 @@ class ParamGridBuilder @Since("1.2.0") { } /** - * Adds a int param with multiple values. + * Adds an int param with multiple values. */ @Since("1.2.0") def addGrid(param: IntParam, values: Array[Int]): this.type = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala index 1d2d777c00793d9b76cfdd875a8f3e73033f8acc..b0fa287473c3cf55b4723b337a2289b1328560d9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala @@ -126,7 +126,7 @@ private[fpm] object FPTree { def isRoot: Boolean = parent == null } - /** Summary of a item in an FP-Tree. */ + /** Summary of an item in an FP-Tree. */ private class Summary[T] extends Serializable { var count: Long = 0L val nodes: ListBuffer[Node[T]] = ListBuffer.empty diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala index 98404be2603c73bdb71f448d024bf0f79a0478e8..d4de0fd7d5f4d8a84d3e3bfddd597929d6be88a3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala @@ -24,7 +24,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors} * :: DeveloperApi :: * MultivariateOnlineSummarizer implements [[MultivariateStatisticalSummary]] to compute the mean, * variance, minimum, maximum, counts, and nonzero counts for instances in sparse or dense vector - * format in a online fashion. + * format in an online fashion. * * Two MultivariateOnlineSummarizer can be merged together to have a statistical summary of * the corresponding joint dataset. diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 43d9072a24e4df397c4de5f3516303c369d94d91..705022934e41bf566489148f58ee15b421f5280e 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -648,7 +648,7 @@ class IsotonicRegressionModel(Saveable, Loader): @since("1.4.0") def save(self, sc, path): - """Save a IsotonicRegressionModel.""" + """Save an IsotonicRegressionModel.""" java_boundaries = _py2java(sc, self.boundaries.tolist()) java_predictions = _py2java(sc, self.predictions.tolist()) java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel( @@ -658,7 +658,7 @@ class IsotonicRegressionModel(Saveable, Loader): @classmethod @since("1.4.0") def load(cls, sc, path): - """Load a IsotonicRegressionModel.""" + """Load an IsotonicRegressionModel.""" java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel.load( sc._jsc.sc(), path) py_boundaries = _java2py(sc, java_model.boundaryVector()).toArray() @@ -694,7 +694,7 @@ class IsotonicRegression(object): @since("1.4.0") def train(cls, data, isotonic=True): """ - Train a isotonic regression model on the given data. + Train an isotonic regression model on the given data. :param data: RDD of (label, feature, weight) tuples. diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 64b8bc442dad04a762c9a7e49710d85eb1968e2a..15cefc8cf1125dfb972d49e7933ef853a57170c7 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1177,7 +1177,7 @@ def sha2(col, numBits): @since(2.0) def hash(*cols): - """Calculates the hash code of given columns, and returns the result as a int column. + """Calculates the hash code of given columns, and returns the result as an int column. >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect() [Row(hash=-757602832)] diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 73d2b81b6bd045acdb56b2da8837379c498cc23a..13d21d7143d28333f46f40aab7c86518e624866f 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -847,7 +847,7 @@ class DataFrameWriter(object): @since(1.4) def jdbc(self, url, table, mode=None, properties=None): - """Saves the content of the :class:`DataFrame` to a external database table via JDBC. + """Saves the content of the :class:`DataFrame` to an external database table via JDBC. .. note:: Don't create too many partitions in parallel on a large cluster; \ otherwise Spark might crash your external database systems. diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index 67a0819601efaf6ca97b001765c199c9f7fabedd..fb6c66f2df9973b68a5f14de721222986bcf8d49 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -623,7 +623,7 @@ class TransformedDStream(DStream): self._jdstream_val = None # Using type() to avoid folding the functions and compacting the DStreams which is not - # not strictly a object of TransformedDStream. + # not strictly an object of TransformedDStream. # Changed here is to avoid bug in KafkaTransformedDStream when calling offsetRanges(). if (type(prev) is TransformedDStream and not prev.is_cached and not prev.is_checkpointed): diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py index 015ca77dbf1fbc4cd4219cc370681322ea6c7335..2c1a667fc80c46d533bc838b7ee230bd462183a0 100644 --- a/python/pyspark/streaming/kafka.py +++ b/python/pyspark/streaming/kafka.py @@ -228,7 +228,7 @@ class OffsetRange(object): def __init__(self, topic, partition, fromOffset, untilOffset): """ - Create a OffsetRange to represent range of offsets + Create an OffsetRange to represent range of offsets :param topic: Kafka topic name. :param partition: Kafka partition id. :param fromOffset: Inclusive starting offset. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index cd242d78a41235c9e6cd39c883571ab3676d2e2c..c5f221d7830f391008dd281eefc126e52a742187 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -215,7 +215,7 @@ trait CheckAnalysis extends PredicateHelper { if (!RowOrdering.isOrderable(expr.dataType)) { failAnalysis( s"expression ${expr.sql} cannot be used as a grouping expression " + - s"because its data type ${expr.dataType.simpleString} is not a orderable " + + s"because its data type ${expr.dataType.simpleString} is not an orderable " + s"data type.") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala index 79c3528a522d3865d8148df0a25d5c6c27c68c06..d4350598f478c4000ee04c559a68131a09ad8218 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala @@ -37,7 +37,7 @@ object TypeCheckResult { /** * Represents the failing result of `Expression.checkInputDataTypes`, - * with a error message to show the reason of failure. + * with an error message to show the reason of failure. */ case class TypeCheckFailure(message: String) extends TypeCheckResult { def isSuccess: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 91bdcc3b09a34f814cfdb707ab92098e294f3d6d..387e5552549e6b6873465c618aa1438f2db36107 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -178,8 +178,8 @@ object TypeCoercion { q transformExpressions { case a: AttributeReference => inputMap.get(a.exprId) match { - // This can happen when a Attribute reference is born in a non-leaf node, for example - // due to a call to an external script like in the Transform operator. + // This can happen when an Attribute reference is born in a non-leaf node, for + // example due to a call to an external script like in the Transform operator. // TODO: Perhaps those should actually be aliases? case None => a // Leave the same if the dataTypes match. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala index 7da1fe93c6c7c8b6bb371a4222a16aff6e39a3a5..c66d08d2cf8fc23fb480379b2818cce9772001df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala @@ -26,7 +26,7 @@ object JarResource extends FunctionResourceType("jar") object FileResource extends FunctionResourceType("file") -// We do not allow users to specify a archive because it is YARN specific. +// We do not allow users to specify an archive because it is YARN specific. // When loading resources, we will throw an exception and ask users to // use --archive with spark submit. object ArchiveResource extends FunctionResourceType("archive") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala index b8ab0364dd8f37514a655862a41309d64922601f..946b3d446a40f7093bff4ce7c51e35f4fab7b074 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.types._ /** * Returns the first value of `child` for a group of rows. If the first value of `child` - * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on a already + * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on an already * sorted column, if we do partial aggregation and final aggregation (when mergeExpression * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index b05d74b49b591f45f6d6f09889d9dfdbc76e161a..53b4b761ae514247d6b9f347ed27985cd2e7b971 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.types._ /** * Returns the last value of `child` for a group of rows. If the last value of `child` - * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on a already + * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on an already * sorted column, if we do partial aggregation and final aggregation (when mergeExpression * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala index 9ead571c5374a3872fb43dc17f38d8b28f661888..16c03c500ad081b28e3d31d7b81647c51085f026 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala @@ -51,7 +51,7 @@ object PivotFirst { } /** - * PivotFirst is a aggregate function used in the second phase of a two phase pivot to do the + * PivotFirst is an aggregate function used in the second phase of a two phase pivot to do the * required rearrangement of values into pivoted form. * * For example on an input of diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 7e3683e482df10336a99ee158601f95c1a43deaf..95ed68fbb05289d48e07a30341e7952df31bc0cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -182,7 +182,7 @@ case class Literal protected (value: Any, dataType: DataType) override protected def jsonFields: List[JField] = { // Turns all kinds of literal values to string in json field, as the type info is hard to - // retain in json format, e.g. {"a": 123} can be a int, or double, or decimal, etc. + // retain in json format, e.g. {"a": 123} can be an int, or double, or decimal, etc. val jsonValue = (value, dataType) match { case (null, _) => JNull case (i: Int, DateType) => JString(DateTimeUtils.toJavaDate(i).toString) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala index 93a8278528697a5ef04de1a306bd19437684cd11..e036982e70f9983c64555be7a1ae65837be61ce4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala @@ -214,7 +214,7 @@ class GenericRowWithSchema(values: Array[Any], override val schema: StructType) } /** - * A internal row implementation that uses an array of objects as the underlying storage. + * An internal row implementation that uses an array of objects as the underlying storage. * Note that, while the array is not copied, and thus could technically be mutated after creation, * this is not allowed. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala index 5001f9a41e07e409b0d2092b61b02d8925655564..08cb6c0134e3a0279cde4a89fe1a2317ae35b1a7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala @@ -129,7 +129,7 @@ object PredicateSubquery { /** * A [[ListQuery]] expression defines the query which we want to search in an IN subquery - * expression. It should and can only be used in conjunction with a IN expression. + * expression. It should and can only be used in conjunction with an IN expression. * * For example (SQL): * {{{ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 5e998d61880ea269a347b61951f066f62fa91bef..48d70099b6d88e5fb34d3d57aca3cd5587841f73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -926,7 +926,7 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { case e @ CaseWhen(branches, _) if branches.headOption.map(_._1) == Some(TrueLiteral) => // If the first branch is a true literal, remove the entire CaseWhen and use the value // from that. Note that CaseWhen.branches should never be empty, and as a result the - // headOption (rather than head) added above is just a extra (and unnecessary) safeguard. + // headOption (rather than head) added above is just an extra (and unnecessary) safeguard. branches.head._2 } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index d042e191a9e867248765ae228c5c968764ae3dd3..d687a85c18b634cc4c2bf8f08cfd311e104bf932 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -59,7 +59,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { } } - /** Get the builder (visitor) which converts a ParseTree into a AST. */ + /** Get the builder (visitor) which converts a ParseTree into an AST. */ protected def astBuilder: AstBuilder protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 8b7e21b67982d9a94651d3e37fc09b891c6fbaec..898784dab1d983920f18bbfb52354da769faf608 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -530,7 +530,7 @@ private[sql] object Expand { /** * Apply the all of the GroupExpressions to every input row, hence we will get - * multiple output rows for a input row. + * multiple output rows for an input row. * * @param bitmasks The bitmask set represents the grouping sets * @param groupByAliases The aliased original group by expressions @@ -572,7 +572,7 @@ private[sql] object Expand { /** * Apply a number of projections to every input row, hence we will get multiple output rows for - * a input row. + * an input row. * * @param projections to apply * @param output of all projections. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala index 42bdab42b79ffd517d30ec690bda721a6f82d4b3..b46f7a6d5a1381b33c47321d5203ea03a7340ec7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst /** - * A a collection of common abstractions for query plans as well as + * A collection of common abstractions for query plans as well as * a base logical plan representation. */ package object plans diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala index d449088498c8e4fa6b5a218eab915bafff010d16..51d78dd1233fe6b5b037991bf48396d5b8e69346 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala @@ -67,7 +67,7 @@ case class ClusteredDistribution(clustering: Seq[Expression]) extends Distributi case class OrderedDistribution(ordering: Seq[SortOrder]) extends Distribution { require( ordering != Nil, - "The ordering expressions of a OrderedDistribution should not be Nil. " + + "The ordering expressions of an OrderedDistribution should not be Nil. " + "An AllTuples should be used to represent a distribution that only has " + "a single partition.") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 31604bad0ff809c9f8e15e28927add3d7833e5d7..52e021070e940e87e22646ca3bb38f620bad9744 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -366,7 +366,7 @@ object Decimal { val ROUND_CEILING = BigDecimal.RoundingMode.CEILING val ROUND_FLOOR = BigDecimal.RoundingMode.FLOOR - /** Maximum number of decimal digits a Int can represent */ + /** Maximum number of decimal digits an Int can represent */ val MAX_INT_DIGITS = 9 /** Maximum number of decimal digits a Long can represent */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index 6b7e3714e0b047ff02d171a993e559555f2c2777..6500875f95e54637decbff6dfd2761f5eaf5927c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -152,7 +152,7 @@ object DecimalType extends AbstractDataType { } /** - * Returns if dt is a DecimalType that fits inside a int + * Returns if dt is a DecimalType that fits inside an int */ def is32BitDecimalType(dt: DataType): Boolean = { dt match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 2e85e3676747a8b779d8897535e41d7fe74f2dd5..c7b887ecd4d3e8852838887fb3f406589946c4b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -529,7 +529,7 @@ final class DataFrameWriter private[sql](df: DataFrame) { } /** - * Saves the content of the [[DataFrame]] to a external database table via JDBC. In the case the + * Saves the content of the [[DataFrame]] to an external database table via JDBC. In the case the * table already exists in the external database, behavior of this function depends on the * save mode, specified by the `mode` function (default to throwing an exception). * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala index 1be41ffc072c141d30dde0584a2f586d988a5dc4..4c046f7bdca4898002446550c02935f42b4f5bcf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.execution.metric.SQLMetrics /** * Apply all of the GroupExpressions to every input row, hence we will get - * multiple output rows for a input row. + * multiple output rows for an input row. * @param projections The group of expressions, all of the group expressions should * output the same schema specified bye the parameter `output` * @param output The output Schema diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala index e81cd28ea34d100d4e13de859b72a5deeaadd52c..5f0c26441692d341dd7a93b96bfdb2c62646d6a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala @@ -95,7 +95,7 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A * interfaces / internals. * * This RDD takes a [[ShuffleDependency]] (`dependency`), - * and a optional array of partition start indices as input arguments + * and an optional array of partition start indices as input arguments * (`specifiedPartitionStartIndices`). * * The `dependency` has the parent RDD of this RDD, which represents the dataset before shuffle diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index 908e22de73bdc057b7651569e1d937c2c33e04d5..2aec9318941a732823f160087c2d141c2f74fe41 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -445,7 +445,7 @@ case class CollapseCodegenStages(conf: SQLConf) extends Rule[SparkPlan] { } /** - * Inserts a InputAdapter on top of those that do not support codegen. + * Inserts an InputAdapter on top of those that do not support codegen. */ private def insertInputAdapter(plan: SparkPlan): SparkPlan = plan match { case j @ SortMergeJoinExec(_, _, _, _, left, right) if j.supportCodegen => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala index 243aa15deba3db7846c68ced87f1ef6614b7830e..4b8adf5230717ce5ef35ebaddbef260cc7e39983 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala @@ -41,7 +41,7 @@ import org.apache.spark.unsafe.KVIterator * - Step 0: Do hash-based aggregation. * - Step 1: Sort all entries of the hash map based on values of grouping expressions and * spill them to disk. - * - Step 2: Create a external sorter based on the spilled sorted map entries and reset the map. + * - Step 2: Create an external sorter based on the spilled sorted map entries and reset the map. * - Step 3: Get a sorted [[KVIterator]] from the external sorter. * - Step 4: Repeat step 0 until no more input. * - Step 5: Initialize sort-based aggregation on the sorted iterator. @@ -434,12 +434,12 @@ class TungstenAggregationIterator( /////////////////////////////////////////////////////////////////////////// /** - * Generate a output row when there is no input and there is no grouping expression. + * Generate an output row when there is no input and there is no grouping expression. */ def outputForEmptyGroupingKeyWithoutInput(): UnsafeRow = { if (groupingExpressions.isEmpty) { sortBasedAggregationBuffer.copyFrom(initialAggregationBuffer) - // We create a output row and copy it. So, we can free the map. + // We create an output row and copy it. So, we can free the map. val resultCopy = generateOutput(UnsafeRow.createFromByteArray(0, 0), sortBasedAggregationBuffer).copy() hashMap.free() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 89bde6ad73f3b74f3d4ecc641ecd8b050c1e9d5a..185c79f899e683b74f9b8eeda7f551a1307ff522 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -142,9 +142,9 @@ case class FilterExec(condition: Expression, child: SparkPlan) // To generate the predicates we will follow this algorithm. // For each predicate that is not IsNotNull, we will generate them one by one loading attributes - // as necessary. For each of both attributes, if there is a IsNotNull predicate we will generate - // that check *before* the predicate. After all of these predicates, we will generate the - // remaining IsNotNull checks that were not part of other predicates. + // as necessary. For each of both attributes, if there is an IsNotNull predicate we will + // generate that check *before* the predicate. After all of these predicates, we will generate + // the remaining IsNotNull checks that were not part of other predicates. // This has the property of not doing redundant IsNotNull checks and taking better advantage of // short-circuiting, not loading attributes until they are needed. // This is very perf sensitive. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala index 3b064a5bc489f222a1be3fb35bf58cf07e27ea3f..61dcbebd644f65129b769e2520282e5334fa04fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala @@ -176,7 +176,7 @@ private[sql] abstract class BaseWriterContainer( val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext]) ctor.newInstance(new Path(outputPath), context) } else { - // The specified output committer is just a OutputCommitter. + // The specified output committer is just an OutputCommitter. // So, we will use the no-argument constructor. val ctor = clazz.getDeclaredConstructor() ctor.newInstance() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala index 0f5d6445b1e2bb1f6cdc6458046959a58d088740..2cc012840dcaab49056b65140a07f272d8044238 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.streaming /** - * A offset is a monotonically increasing metric used to track progress in the computation of a + * An offset is a monotonically increasing metric used to track progress in the computation of a * stream. An [[Offset]] must be comparable, and the result of `compareTo` must be consistent * with `equals` and `hashcode`. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 0b490fe71c526cda6398f6f6953f451d562e0e43..d89e98645b3670a0d8a60a18806cf8463f70b2bb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1996,7 +1996,7 @@ object functions { /** * Computes the numeric value of the first character of the string column, and returns the - * result as a int column. + * result as an int column. * * @group string_funcs * @since 1.5.0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index f3f36efda599b7c4a3b14947f74c3e510f34b61a..ceb68622752ea0100354317283040d7dd254243c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -356,7 +356,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName) sessionCatalog.refreshTable(tableIdent) - // If this table is cached as a InMemoryRelation, drop the original + // If this table is cached as an InMemoryRelation, drop the original // cached version and make the new version cached lazily. val logicalPlan = sparkSession.sessionState.catalog.lookupRelation(tableIdent) // Use lookupCachedData directly since RefreshTable also takes databaseName. diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala index cef5912c6240965a8b6b2ea89e789409ebf7fd37..de70fdc14ef34ebe04692e29b8fa7caaa86b0f75 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala @@ -157,7 +157,7 @@ object HiveThriftServer2 extends Logging { /** - * A inner sparkListener called in sc.stop to clean up the HiveThriftServer2 + * An inner sparkListener called in sc.stop to clean up the HiveThriftServer2 */ private[thriftserver] class HiveThriftServer2Listener( val server: HiveServer2, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index d033b05d4806eb105013b690be5d2ea9d0ea1fb3..88f4a2d2b20bac32d7ee9c71c9cd5b0d389ee605 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -391,7 +391,7 @@ private[spark] object HiveUtils extends Logging { // Remote means that the metastore server is running in its own process. // When the mode is remote, configurations like "javax.jdo.option.ConnectionURL" will not be // used (because they are used by remote metastore server that talks to the database). - // Because execution Hive should always connects to a embedded derby metastore. + // Because execution Hive should always connects to an embedded derby metastore. // We have to remove the value of hive.metastore.uris. So, the execution Hive client connects // to the actual embedded derby metastore instead of the remote metastore. // You can search HiveConf.ConfVars.METASTOREURIS in the code of HiveConf (in Hive's repo). diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 9e8ff9317c1080b41f91c39312743194aa337e73..b3896484da178ba9f3e3b974ebe363f728dda38d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -211,7 +211,7 @@ case class InsertIntoHiveTable( val warningMessage = s"$outputCommitterClass may be an output committer that writes data directly to " + "the final location. Because speculation is enabled, this output committer may " + - "cause data loss (see the case in SPARK-10063). If possible, please use a output " + + "cause data loss (see the case in SPARK-10063). If possible, please use an output " + "committer that does not have this behavior (e.g. FileOutputCommitter)." logWarning(warningMessage) } @@ -250,7 +250,7 @@ case class InsertIntoHiveTable( orderedPartitionSpec.put(entry.getName, partitionSpec.getOrElse(entry.getName, "")) } - // inheritTableSpecs is set to true. It should be set to false for a IMPORT query + // inheritTableSpecs is set to true. It should be set to false for an IMPORT query // which is currently considered as a Hive native command. val inheritTableSpecs = true // TODO: Correctly set isSkewedStoreAsSubdir. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala index 13d2bed606a79280bdfc349e307cadc021410e26..0589c8ece3910c6842677ae28658ea403b55a995 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.types.StructType private[orc] object OrcFileOperator extends Logging { /** - * Retrieves a ORC file reader from a given path. The path can point to either a directory or a + * Retrieves an ORC file reader from a given path. The path can point to either a directory or a * single ORC file. If it points to an directory, it picks any non-empty ORC file within that * directory. * @@ -42,7 +42,7 @@ private[orc] object OrcFileOperator extends Logging { * ORC file if the file contains zero rows. This is OK for Hive since the schema of the * table is managed by metastore. But this becomes a problem when reading ORC files * directly from HDFS via Spark SQL, because we have to discover the schema from raw ORC - * files. So this method always tries to find a ORC file whose schema is non-empty, and + * files. So this method always tries to find an ORC file whose schema is non-empty, and * create the result reader from that file. If no such file is found, it returns `None`. * @todo Needs to consider all files when schema evolution is taken into account. */ diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala index 6ececb1062ae124d4c26767a4a8f8971e106580f..0b1102686319922d8f2b333bb2817ddb815bc5ad 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala @@ -154,7 +154,7 @@ object Checkpoint extends Logging { Utils.tryWithSafeFinally { // ObjectInputStream uses the last defined user-defined class loader in the stack - // to find classes, which maybe the wrong class loader. Hence, a inherited version + // to find classes, which maybe the wrong class loader. Hence, an inherited version // of ObjectInputStream is used to explicitly use the current thread's default class // loader to find and load classes. This is a well know Java issue and has popped up // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index 928739a416f0f6bd4d38c7574246e600146384fe..b524af9578b7223515a46cd0e7de64b96ea936bc 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -322,7 +322,7 @@ class StreamingContext private[streaming] ( } /** - * Create a input stream from network source hostname:port, where data is received + * Create an input stream from network source hostname:port, where data is received * as serialized blocks (serialized using the Spark's serializer) that can be directly * pushed into the block manager without deserializing them. This is the most efficient * way to receive data. @@ -341,7 +341,7 @@ class StreamingContext private[streaming] ( } /** - * Create a input stream that monitors a Hadoop-compatible filesystem + * Create an input stream that monitors a Hadoop-compatible filesystem * for new files and reads them using the given key-value types and input format. * Files must be written to the monitored directory by "moving" them from another * location within the same file system. File names starting with . are ignored. @@ -359,7 +359,7 @@ class StreamingContext private[streaming] ( } /** - * Create a input stream that monitors a Hadoop-compatible filesystem + * Create an input stream that monitors a Hadoop-compatible filesystem * for new files and reads them using the given key-value types and input format. * Files must be written to the monitored directory by "moving" them from another * location within the same file system. @@ -379,7 +379,7 @@ class StreamingContext private[streaming] ( } /** - * Create a input stream that monitors a Hadoop-compatible filesystem + * Create an input stream that monitors a Hadoop-compatible filesystem * for new files and reads them using the given key-value types and input format. * Files must be written to the monitored directory by "moving" them from another * location within the same file system. File names starting with . are ignored. @@ -403,7 +403,7 @@ class StreamingContext private[streaming] ( } /** - * Create a input stream that monitors a Hadoop-compatible filesystem + * Create an input stream that monitors a Hadoop-compatible filesystem * for new files and reads them as text files (using key as LongWritable, value * as Text and input format as TextInputFormat). Files must be written to the * monitored directory by "moving" them from another location within the same diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala index 42fc84c19b971c0bf0594ba86168d15c8300be9e..faf6db82d5b18593bf561bd6b3629d7c9bd25fee 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala @@ -79,7 +79,7 @@ private[streaming] abstract class ReceiverSupervisor( optionalBlockId: Option[StreamBlockId] ): Unit - /** Store a iterator of received data as a data block into Spark's memory. */ + /** Store an iterator of received data as a data block into Spark's memory. */ def pushIterator( iterator: Iterator[_], optionalMetadata: Option[Any], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala index 4fb0f8caacbb687ddb41e00f2c3afda631cc596d..5ba09a54af18df28f00e17df73e966abc6daedd9 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala @@ -129,7 +129,7 @@ private[streaming] class ReceiverSupervisorImpl( pushAndReportBlock(ArrayBufferBlock(arrayBuffer), metadataOption, blockIdOption) } - /** Store a iterator of received data as a data block into Spark's memory. */ + /** Store an iterator of received data as a data block into Spark's memory. */ def pushIterator( iterator: Iterator[_], metadataOption: Option[Any], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala index f7b6584893c6ee125667635235799d0bc374fe67..fb5587edeccee2fa0c08e6ad573920b8574e8ac0 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala @@ -38,7 +38,7 @@ import org.apache.spark.util.{Clock, Utils} * - Periodically take the average batch completion times and compare with the batch interval * - If (avg. proc. time / batch interval) >= scaling up ratio, then request more executors. * The number of executors requested is based on the ratio = (avg. proc. time / batch interval). - * - If (avg. proc. time / batch interval) <= scaling down ratio, then try to kill a executor that + * - If (avg. proc. time / batch interval) <= scaling down ratio, then try to kill an executor that * is not running a receiver. * * This features should ideally be used in conjunction with backpressure, as backpressure ensures diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala index 4f124a1356b5ab5dc2cc6ab13329f6d6a4a2ade5..8e1a09061843301c2e5b0764be05ce8a66d58a7d 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala @@ -67,7 +67,7 @@ private[streaming] class InputInfoTracker(ssc: StreamingContext) extends Logging if (inputInfos.contains(inputInfo.inputStreamId)) { throw new IllegalStateException(s"Input stream ${inputInfo.inputStreamId} for batch" + - s"$batchTime is already added into InputInfoTracker, this is a illegal state") + s"$batchTime is already added into InputInfoTracker, this is an illegal state") } inputInfos += ((inputInfo.inputStreamId, inputInfo)) } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala index 1ef26d2f865da6bebacb3dbb3d8459e2c32332a0..60122b48135f7a539fadf80dc0990bf55a76b1a7 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala @@ -86,7 +86,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { /** * Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into - * one cell, we use "rowspan" for the first row of a output op. + * one cell, we use "rowspan" for the first row of an output op. */ private def generateNormalJobRow( outputOpData: OutputOperationUIData,