From 97ba4918368ba15334427bdd91230829ece606f6 Mon Sep 17 00:00:00 2001 From: hyukjinkwon <gurwls223@gmail.com> Date: Thu, 3 Aug 2017 23:00:00 +0900 Subject: [PATCH] [SPARK-21602][R] Add map_keys and map_values functions to R ## What changes were proposed in this pull request? This PR adds `map_values` and `map_keys` to R API. ```r > df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) > tmp <- mutate(df, v = create_map(df$model, df$cyl)) > head(select(tmp, map_keys(tmp$v))) ``` ``` map_keys(v) 1 Mazda RX4 2 Mazda RX4 Wag 3 Datsun 710 4 Hornet 4 Drive 5 Hornet Sportabout 6 Valiant ``` ```r > head(select(tmp, map_values(tmp$v))) ``` ``` map_values(v) 1 6 2 6 3 4 4 6 5 8 6 6 ``` ## How was this patch tested? Manual tests and unit tests in `R/pkg/tests/fulltests/test_sparkSQL.R` Author: hyukjinkwon <gurwls223@gmail.com> Closes #18809 from HyukjinKwon/map-keys-values-r. --- R/pkg/NAMESPACE | 2 ++ R/pkg/R/functions.R | 33 ++++++++++++++++++++++++++- R/pkg/R/generics.R | 10 ++++++++ R/pkg/tests/fulltests/test_sparkSQL.R | 8 +++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 232f5cf31f..a1dd1af4bf 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -286,6 +286,8 @@ exportMethods("%<=>%", "lower", "lpad", "ltrim", + "map_keys", + "map_values", "max", "md5", "mean", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 86507f13f0..5a46d737ae 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -195,7 +195,10 @@ NULL #' head(tmp2) #' head(select(tmp, posexplode(tmp$v1))) #' head(select(tmp, sort_array(tmp$v1))) -#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))} +#' head(select(tmp, sort_array(tmp$v1, asc = FALSE))) +#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl)) +#' head(select(tmp3, map_keys(tmp3$v3))) +#' head(select(tmp3, map_values(tmp3$v3)))} NULL #' Window functions for Column operations @@ -3055,6 +3058,34 @@ setMethod("array_contains", column(jc) }) +#' @details +#' \code{map_keys}: Returns an unordered array containing the keys of the map. +#' +#' @rdname column_collection_functions +#' @aliases map_keys map_keys,Column-method +#' @export +#' @note map_keys since 2.3.0 +setMethod("map_keys", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "map_keys", x@jc) + column(jc) + }) + +#' @details +#' \code{map_values}: Returns an unordered array containing the values of the map. +#' +#' @rdname column_collection_functions +#' @aliases map_values map_values,Column-method +#' @export +#' @note map_values since 2.3.0 +setMethod("map_values", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "map_values", x@jc) + column(jc) + }) + #' @details #' \code{explode}: Creates a new row for each element in the given array or map column. #' diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 92098741f7..df91c35f7d 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1213,6 +1213,16 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") }) #' @name NULL setGeneric("ltrim", function(x) { standardGeneric("ltrim") }) +#' @rdname column_collection_functions +#' @export +#' @name NULL +setGeneric("map_keys", function(x) { standardGeneric("map_keys") }) + +#' @rdname column_collection_functions +#' @export +#' @name NULL +setGeneric("map_values", function(x) { standardGeneric("map_values") }) + #' @rdname column_misc_functions #' @export #' @name NULL diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 77052d4a28..deb0e163a8 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1436,6 +1436,14 @@ test_that("column functions", { result <- collect(select(df, sort_array(df[[1]])))[[1]] expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L))) + # Test map_keys() and map_values() + df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2))))) + result <- collect(select(df, map_keys(df$map)))[[1]] + expect_equal(result, list(list("x", "y"))) + + result <- collect(select(df, map_values(df$map)))[[1]] + expect_equal(result, list(list(1, 2))) + # Test that stats::lag is working expect_equal(length(lag(ldeaths, 12)), 72) -- GitLab