diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 2cff3ac08c3ae16d30f4898b72f2589abfe47f10..449476dec5339e272c4b96b3eb346c175e09633e 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2632,8 +2632,8 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"), #' format_number #' -#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places, -#' and returns the result as a string column. +#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places +#' with HALF_EVEN round mode, and returns the result as a string column. #' #' If x is 0, the result has no decimal point or fractional part. #' If x < 0, the result will be null. @@ -3548,7 +3548,7 @@ setMethod("row_number", #' array_contains #' -#' Returns true if the array contain the value. +#' Returns null if the array is null, true if the array contains the value, and false otherwise. #' #' @param x A Column #' @param value A value to be checked if contained in the column diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index f9121e60f35b85920ab5132fd30a19ee7255c2ec..843ae3816f061cfb19f162436ad981e167811c54 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1327,8 +1327,8 @@ def encode(col, charset): @since(1.5) def format_number(col, d): """ - Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places, - and returns the result as a string. + Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places + with HALF_EVEN round mode, and returns the result as a string. :param col: the column name of the numeric value to be formatted :param d: the N decimal places @@ -1675,8 +1675,8 @@ def array(*cols): @since(1.5) def array_contains(col, value): """ - Collection function: returns True if the array contains the given value. The collection - elements and value must be of the same type. + Collection function: returns null if the array is null, true if the array contains the + given value, and false otherwise. :param col: name of column containing array :param value: value to check for in array diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index b93b7ed192104cdb9bb3d68f15f80e2cb75d1c24..db41b4edb6dde83bc3320002fccbb94a841e59f1 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1129,6 +1129,14 @@ class SQLTests(ReusedPySparkTestCase): rndn2 = df.select('key', functions.randn(0)).collect() self.assertEqual(sorted(rndn1), sorted(rndn2)) + def test_array_contains_function(self): + from pyspark.sql.functions import array_contains + + df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ['data']) + actual = df.select(array_contains(df.data, 1).alias('b')).collect() + # The value argument can be implicitly castable to the element's type of the array. + self.assertEqual([Row(b=True), Row(b=False)], actual) + def test_between_function(self): df = self.sc.parallelize([ Row(a=1, b=2, c=3),