diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 166b39813c14e0afb67986e87e71a6f4a2b76256..3b9d42d6e715855c167c0f38b317c1e97375227c 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2646,6 +2646,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #' Input SparkDataFrames can have different schemas (names and data types). #' #' Note: This does not remove duplicate rows across the two SparkDataFrames. +#' Also as standard in SQL, this function resolves columns by position (not by name). #' #' @param x A SparkDataFrame #' @param y A SparkDataFrame diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 99abfcc556dff07a0e54a7ba3312f97d76b0f556..8541403dfe2f1e7cf468ae81df3eca22957d1f26 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1175,18 +1175,23 @@ class DataFrame(object): @since(2.0) def union(self, other): - """ Return a new :class:`DataFrame` containing union of rows in this - frame and another frame. + """ Return a new :class:`DataFrame` containing union of rows in this and another frame. This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does deduplication of elements), use this function followed by a distinct. + + Also as standard in SQL, this function resolves columns by position (not by name). """ return DataFrame(self._jdf.union(other._jdf), self.sql_ctx) @since(1.3) def unionAll(self, other): - """ Return a new :class:`DataFrame` containing union of rows in this - frame and another frame. + """ Return a new :class:`DataFrame` containing union of rows in this and another frame. + + This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union + (that does deduplication of elements), use this function followed by a distinct. + + Also as standard in SQL, this function resolves columns by position (not by name). .. note:: Deprecated in 2.0, use union instead. """ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index f7637e005f31733be14c901435559433df69f19b..d28ff7888d127daa5678a5c7b2cde0319e0bedca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1734,10 +1734,11 @@ class Dataset[T] private[sql]( /** * Returns a new Dataset containing union of rows in this Dataset and another Dataset. - * This is equivalent to `UNION ALL` in SQL. * - * To do a SQL-style set union (that does deduplication of elements), use this function followed - * by a [[distinct]]. + * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does + * deduplication of elements), use this function followed by a [[distinct]]. + * + * Also as standard in SQL, this function resolves columns by position (not by name). * * @group typedrel * @since 2.0.0 @@ -1747,10 +1748,11 @@ class Dataset[T] private[sql]( /** * Returns a new Dataset containing union of rows in this Dataset and another Dataset. - * This is equivalent to `UNION ALL` in SQL. * - * To do a SQL-style set union (that does deduplication of elements), use this function followed - * by a [[distinct]]. + * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does + * deduplication of elements), use this function followed by a [[distinct]]. + * + * Also as standard in SQL, this function resolves columns by position (not by name). * * @group typedrel * @since 2.0.0