Skip to content
Snippets Groups Projects
Commit d1f6c64c authored by hyukjinkwon's avatar hyukjinkwon Committed by Felix Cheung
Browse files

[SPARK-19828][R] Support array type in from_json in R

## What changes were proposed in this pull request?

Since we could not directly define the array type in R, this PR proposes to support array types in R as string types that are used in `structField` as below:

```R
jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
df <- as.DataFrame(list(list("people" = jsonArr)))
collect(select(df, alias(from_json(df$people, "array<struct<name:string>>"), "arrcol")))
```

prints

```R
      arrcol
1 Bob, Alice
```

## How was this patch tested?

Unit tests in `test_sparkSQL.R`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17178 from HyukjinKwon/SPARK-19828.
parent 8fb2a02e
No related branches found
No related tags found
No related merge requests found
......@@ -2437,6 +2437,7 @@ setMethod("date_format", signature(y = "Column", x = "character"),
#'
#' @param x Column containing the JSON string.
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
#' @param asJsonArray indicating if input string is JSON array of objects or a single object.
#' @param ... additional named properties to control how the json is parsed, accepts the same
#' options as the JSON data source.
#'
......@@ -2452,11 +2453,18 @@ setMethod("date_format", signature(y = "Column", x = "character"),
#'}
#' @note from_json since 2.2.0
setMethod("from_json", signature(x = "Column", schema = "structType"),
function(x, schema, ...) {
function(x, schema, asJsonArray = FALSE, ...) {
if (asJsonArray) {
jschema <- callJStatic("org.apache.spark.sql.types.DataTypes",
"createArrayType",
schema$jobj)
} else {
jschema <- schema$jobj
}
options <- varargsToStrEnv(...)
jc <- callJStatic("org.apache.spark.sql.functions",
"from_json",
x@jc, schema$jobj, options)
x@jc, jschema, options)
column(jc)
})
......
......@@ -1364,6 +1364,18 @@ test_that("column functions", {
# check for unparseable
df <- as.DataFrame(list(list("a" = "")))
expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
# check if array type in string is correctly supported.
jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
df <- as.DataFrame(list(list("people" = jsonArr)))
schema <- structType(structField("name", "string"))
arr <- collect(select(df, alias(from_json(df$people, schema, asJsonArray = TRUE), "arrcol")))
expect_equal(ncol(arr), 1)
expect_equal(nrow(arr), 1)
expect_is(arr[[1]][[1]], "list")
expect_equal(length(arr$arrcol[[1]]), 2)
expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
})
test_that("column binary mathfunctions", {
......
......@@ -81,7 +81,7 @@ private[sql] object SQLUtils extends Logging {
new JavaSparkContext(spark.sparkContext)
}
def createStructType(fields : Seq[StructField]): StructType = {
def createStructType(fields: Seq[StructField]): StructType = {
StructType(fields)
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment