diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 3824e0a99557c0d8c0b256e57d7f8491fc9775c8..6b7a341bee889ffc6abfc0213ebc5efc2fa954fb 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -298,6 +298,8 @@ parquetFile <- function(sqlContext, ...) { #' Create a SparkDataFrame from a text file. #' #' Loads a text file and returns a SparkDataFrame with a single string column named "value". +#' If the directory structure of the text files contains partitioning information, those are +#' ignored in the resulting DataFrame. #' Each line in the text file is a new row in the resulting SparkDataFrame. #' #' @param sqlContext SQLContext to use diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 8e6bce90010e32fbbfad3651c5127001bbe42bfb..855c9d666f0bca760f72a82d40dbc6fdd655b39d 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -286,6 +286,9 @@ class DataFrameReader(object): @since(1.6) def text(self, paths): """Loads a text file and returns a [[DataFrame]] with a single string column named "value". + If the directory structure of the text files contains partitioning information, + those are ignored in the resulting DataFrame. To include partitioning information as + columns, use ``read.format('text').load(...)``. Each line in the text file is a new row in the resulting DataFrame. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index e33fd831ab4711153d57c3ca30774f35e2c1c597..57a2091fe8c77d683685ac93a4fb8271d1386ecf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -440,10 +440,14 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { } /** - * Loads a text file and returns a [[Dataset]] of String. The underlying schema of the Dataset + * Loads text files and returns a [[Dataset]] of String. The underlying schema of the Dataset * contains a single string column named "value". * - * Each line in the text file is a new row in the resulting Dataset. For example: + * If the directory structure of the text files contains partitioning information, those are + * ignored in the resulting Dataset. To include partitioning information as columns, use + * `read.format("text").load("...")`. + * + * Each line in the text files is a new element in the resulting Dataset. For example: * {{{ * // Scala: * spark.read.text("/path/to/spark/README.md")