Skip to content
Snippets Groups Projects
Commit a5c52c1a authored by hqzizania's avatar hqzizania Committed by Shivaram Venkataraman
Browse files

[SPARK-6820] [SPARKR] Convert NAs to null type in SparkR DataFrames

Author: hqzizania <qian.huang@intel.com>

Closes #6190 from hqzizania/R and squashes the following commits:

1641f9e [hqzizania] fixes and add test units
bb3411a [hqzizania] Convert NAs to null type in SparkR DataFrames
parent 82870d50
No related branches found
No related tags found
No related merge requests found
...@@ -37,6 +37,14 @@ writeObject <- function(con, object, writeType = TRUE) { ...@@ -37,6 +37,14 @@ writeObject <- function(con, object, writeType = TRUE) {
# passing in vectors as arrays and instead require arrays to be passed # passing in vectors as arrays and instead require arrays to be passed
# as lists. # as lists.
type <- class(object)[[1]] # class of POSIXlt is c("POSIXlt", "POSIXt") type <- class(object)[[1]] # class of POSIXlt is c("POSIXlt", "POSIXt")
# Checking types is needed here, since ‘is.na’ only handles atomic vectors,
# lists and pairlists
if (type %in% c("integer", "character", "logical", "double", "numeric")) {
if (is.na(object)) {
object <- NULL
type <- "NULL"
}
}
if (writeType) { if (writeType) {
writeType(con, type) writeType(con, type)
} }
......
...@@ -101,6 +101,43 @@ test_that("create DataFrame from RDD", { ...@@ -101,6 +101,43 @@ test_that("create DataFrame from RDD", {
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
}) })
test_that("convert NAs to null type in DataFrames", {
rdd <- parallelize(sc, list(list(1L, 2L), list(NA, 4L)))
df <- createDataFrame(sqlContext, rdd, list("a", "b"))
expect_true(is.na(collect(df)[2, "a"]))
expect_equal(collect(df)[2, "b"], 4L)
l <- data.frame(x = 1L, y = c(1L, NA_integer_, 3L))
df <- createDataFrame(sqlContext, l)
expect_equal(collect(df)[2, "x"], 1L)
expect_true(is.na(collect(df)[2, "y"]))
rdd <- parallelize(sc, list(list(1, 2), list(NA, 4)))
df <- createDataFrame(sqlContext, rdd, list("a", "b"))
expect_true(is.na(collect(df)[2, "a"]))
expect_equal(collect(df)[2, "b"], 4)
l <- data.frame(x = 1, y = c(1, NA_real_, 3))
df <- createDataFrame(sqlContext, l)
expect_equal(collect(df)[2, "x"], 1)
expect_true(is.na(collect(df)[2, "y"]))
l <- list("a", "b", NA, "d")
df <- createDataFrame(sqlContext, l)
expect_true(is.na(collect(df)[3, "_1"]))
expect_equal(collect(df)[4, "_1"], "d")
l <- list("a", "b", NA_character_, "d")
df <- createDataFrame(sqlContext, l)
expect_true(is.na(collect(df)[3, "_1"]))
expect_equal(collect(df)[4, "_1"], "d")
l <- list(TRUE, FALSE, NA, TRUE)
df <- createDataFrame(sqlContext, l)
expect_true(is.na(collect(df)[3, "_1"]))
expect_equal(collect(df)[4, "_1"], TRUE)
})
test_that("toDF", { test_that("toDF", {
rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
df <- toDF(rdd, list("a", "b")) df <- toDF(rdd, list("a", "b"))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment