diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 06dd6b75dff3d382146f8f425780bfcd515e533e..f4c93d3c7dd6710e75d6ccc5d89f90f216d3d65d 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1566,7 +1566,7 @@ setMethod("fillna", #' @return a local R data.frame representing the contingency table. The first column of each row #' will be the distinct values of `col1` and the column names will be the distinct values #' of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no -#' occurrences will have `null` as their counts. +#' occurrences will have zero as their counts. #' #' @rdname statfunctions #' @export diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 83e02b85f06f109d3d12ac2efb09b340bce2e557..d76e051bd73a151873b2bb4242b375129cae03fd 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1130,7 +1130,7 @@ class DataFrame(object): non-zero pair frequencies will be returned. The first column of each row will be the distinct values of `col1` and the column names will be the distinct values of `col2`. The name of the first column will be `$col1_$col2`. - Pairs that have no occurrences will have `null` as their counts. + Pairs that have no occurrences will have zero as their counts. :func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases. :param col1: The name of the first column. Distinct items will make the first item of diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index 587869e57f96ebc0fb32314ae91b5e7081186fbb..4ec58082e7aef731d1cc44b06e6bf5e12f76afee 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -77,7 +77,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * pair frequencies will be returned. * The first column of each row will be the distinct values of `col1` and the column names will * be the distinct values of `col2`. The name of the first column will be `$col1_$col2`. Counts - * will be returned as `Long`s. Pairs that have no occurrences will have `null` as their counts. + * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts. * Null elements will be replaced by "null", and back ticks will be dropped from elements if they * exist. *