From ae3df4e98f160f94d1e52c90363f26eb351d0153 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Sun, 30 Apr 2017 12:33:03 -0700
Subject: [PATCH] [SPARK-20535][SPARKR] R wrappers for explode_outer and
 posexplode_outer

## What changes were proposed in this pull request?

Ad R wrappers for

- `o.a.s.sql.functions.explode_outer`
- `o.a.s.sql.functions.posexplode_outer`

## How was this patch tested?

Additional unit tests, manual testing.

Author: zero323 <zero323@users.noreply.github.com>

Closes #17809 from zero323/SPARK-20535.
---
 R/pkg/NAMESPACE                           |  2 +
 R/pkg/R/functions.R                       | 56 +++++++++++++++++++++++
 R/pkg/R/generics.R                        |  8 ++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  1 +
 4 files changed, 67 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 2800461658..db8e06db18 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -234,6 +234,7 @@ exportMethods("%in%",
               "endsWith",
               "exp",
               "explode",
+              "explode_outer",
               "expm1",
               "expr",
               "factorial",
@@ -296,6 +297,7 @@ exportMethods("%in%",
               "percent_rank",
               "pmod",
               "posexplode",
+              "posexplode_outer",
               "quarter",
               "rand",
               "randn",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 6b91fa5bde..f4a34fbabe 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3803,3 +3803,59 @@ setMethod("repeat_string",
             jc <- callJStatic("org.apache.spark.sql.functions", "repeat", x@jc, numToInt(n))
             column(jc)
           })
+
+#' explode_outer
+#'
+#' Creates a new row for each element in the given array or map column.
+#' Unlike \code{explode}, if the array/map is \code{null} or empty
+#' then \code{null} is produced.
+#'
+#' @param x Column to compute on
+#'
+#' @rdname explode_outer
+#' @name explode_outer
+#' @family collection_funcs
+#' @aliases explode_outer,Column-method
+#' @export
+#' @examples \dontrun{
+#' df <- createDataFrame(data.frame(
+#'   id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
+#' ))
+#'
+#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
+#' }
+#' @note explode_outer since 2.3.0
+setMethod("explode_outer",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "explode_outer", x@jc)
+            column(jc)
+          })
+
+#' posexplode_outer
+#'
+#' Creates a new row for each element with position in the given array or map column.
+#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
+#' then the row (\code{null}, \code{null}) is produced.
+#'
+#' @param x Column to compute on
+#'
+#' @rdname posexplode_outer
+#' @name posexplode_outer
+#' @family collection_funcs
+#' @aliases posexplode_outer,Column-method
+#' @export
+#' @examples \dontrun{
+#' df <- createDataFrame(data.frame(
+#'   id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
+#' ))
+#'
+#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
+#' }
+#' @note posexplode_outer since 2.3.0
+setMethod("posexplode_outer",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "posexplode_outer", x@jc)
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 749ee9b54c..e510ff9a2d 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1016,6 +1016,10 @@ setGeneric("encode", function(x, charset) { standardGeneric("encode") })
 #' @export
 setGeneric("explode", function(x) { standardGeneric("explode") })
 
+#' @rdname explode_outer
+#' @export
+setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })
+
 #' @rdname expr
 #' @export
 setGeneric("expr", function(x) { standardGeneric("expr") })
@@ -1175,6 +1179,10 @@ setGeneric("pmod", function(y, x) { standardGeneric("pmod") })
 #' @export
 setGeneric("posexplode", function(x) { standardGeneric("posexplode") })
 
+#' @rdname posexplode_outer
+#' @export
+setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") })
+
 #' @rdname quarter
 #' @export
 setGeneric("quarter", function(x) { standardGeneric("quarter") })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 1a3d6df437..1828cddffd 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1347,6 +1347,7 @@ test_that("column functions", {
   c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
   c19 <- spark_partition_id() + coalesce(c) + coalesce(c1, c2, c3)
   c20 <- to_timestamp(c) + to_timestamp(c, "yyyy") + to_date(c, "yyyy")
+  c21 <- posexplode_outer(c) + explode_outer(c)
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
-- 
GitLab