diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R index fee4a4cc9ff276e9e4f037deda51c1dc91bb961f..552cbe40dad498193471c6f5eda7937a57009c53 100644 --- a/R/pkg/R/mllib_classification.R +++ b/R/pkg/R/mllib_classification.R @@ -145,7 +145,7 @@ setMethod("summary", signature(object = "LogisticRegressionModel"), labels <- callJMethod(jobj, "labels") coefficients <- callJMethod(jobj, "rCoefficients") nCol <- length(coefficients) / length(features) - coefficients <- matrix(coefficients, ncol = nCol) + coefficients <- matrix(unlist(coefficients), ncol = nCol) # If nCol == 1, means this is a binomial logistic regression model with pivoting. # Otherwise, it's a multinomial logistic regression model without pivoting. if (nCol == 1) { diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index e384c7398b22f4b675c0df0df27aba80c00bd74f..3b782ce41e458f9ed3c7aa5578751a10dcbb47cf 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -390,7 +390,7 @@ setMethod("summary", signature(object = "KMeansModel"), coefficients <- callJMethod(jobj, "coefficients") k <- callJMethod(jobj, "k") size <- callJMethod(jobj, "size") - coefficients <- t(matrix(coefficients, ncol = k)) + coefficients <- t(matrix(unlist(coefficients), ncol = k)) colnames(coefficients) <- unlist(features) rownames(coefficients) <- 1:k cluster <- if (is.loaded) { diff --git a/R/pkg/R/mllib_regression.R b/R/pkg/R/mllib_regression.R index 79086003463e7e6b50324a5ea78c6b0ab1307dd8..96ee220bc41137e06a42a8e388076bc5b6edae0f 100644 --- a/R/pkg/R/mllib_regression.R +++ b/R/pkg/R/mllib_regression.R @@ -182,11 +182,11 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), # coefficients, standard error of coefficients, t value and p value. Otherwise, # it will be fitted by local "l-bfgs", we can only provide coefficients. if (length(features) == length(coefficients)) { - coefficients <- matrix(coefficients, ncol = 1) + coefficients <- matrix(unlist(coefficients), ncol = 1) colnames(coefficients) <- c("Estimate") rownames(coefficients) <- unlist(features) } else { - coefficients <- matrix(coefficients, ncol = 4) + coefficients <- matrix(unlist(coefficients), ncol = 4) colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") rownames(coefficients) <- unlist(features) } diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index 2e0dea321e7bf1436f32bbfa8b0cbad205d09008..5f84a620c1089c42ff59a1ba0c5f319c5abefbe8 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -68,12 +68,17 @@ test_that("spark.logit", { df <- suppressWarnings(createDataFrame(iris)) model <- spark.logit(df, Species ~ ., regParam = 0.5) summary <- summary(model) + + # test summary coefficients return matrix type + expect_true(class(summary$coefficients) == "matrix") + expect_true(class(summary$coefficients[, 1]) == "numeric") + versicolorCoefsR <- c(1.52, 0.03, -0.53, 0.04, 0.00) virginicaCoefsR <- c(-2.62, 0.27, -0.02, 0.16, 0.42) setosaCoefsR <- c(1.10, -0.29, 0.55, -0.19, -0.42) - versicolorCoefs <- unlist(summary$coefficients[, "versicolor"]) - virginicaCoefs <- unlist(summary$coefficients[, "virginica"]) - setosaCoefs <- unlist(summary$coefficients[, "setosa"]) + versicolorCoefs <- summary$coefficients[, "versicolor"] + virginicaCoefs <- summary$coefficients[, "virginica"] + setosaCoefs <- summary$coefficients[, "setosa"] expect_true(all(abs(versicolorCoefsR - versicolorCoefs) < 0.1)) expect_true(all(abs(virginicaCoefsR - virginicaCoefs) < 0.1)) expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1)) @@ -136,8 +141,8 @@ test_that("spark.logit", { summary <- summary(model) versicolorCoefsR <- c(3.94, -0.16, -0.02, -0.35, -0.78) virginicaCoefsR <- c(-3.94, 0.16, -0.02, 0.35, 0.78) - versicolorCoefs <- unlist(summary$coefficients[, "versicolor"]) - virginicaCoefs <- unlist(summary$coefficients[, "virginica"]) + versicolorCoefs <- summary$coefficients[, "versicolor"] + virginicaCoefs <- summary$coefficients[, "virginica"] expect_true(all(abs(versicolorCoefsR - versicolorCoefs) < 0.1)) expect_true(all(abs(virginicaCoefsR - virginicaCoefs) < 0.1)) @@ -145,7 +150,7 @@ test_that("spark.logit", { model <- spark.logit(training, Species ~ ., regParam = 0.5) summary <- summary(model) coefsR <- c(-6.08, 0.25, 0.16, 0.48, 1.04) - coefs <- unlist(summary$coefficients[, "Estimate"]) + coefs <- summary$coefficients[, "Estimate"] expect_true(all(abs(coefsR - coefs) < 0.1)) # Test prediction with string label diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index aad834bb64b8e2e9aead7881d9128482195b8989..28a6eeba2c0ac1d52801e52c7a1d99711404bc29 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -166,6 +166,10 @@ test_that("spark.kmeans", { expect_equal(k, 2) expect_equal(sort(collect(distinct(select(cluster, "prediction")))$prediction), c(0, 1)) + # test summary coefficients return matrix type + expect_true(class(summary.model$coefficients) == "matrix") + expect_true(class(summary.model$coefficients[1, ]) == "numeric") + # Test model save/load modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp") write.ml(model, modelPath) diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/inst/tests/testthat/test_mllib_regression.R index c450a151713ffe91f9b06c6fcbc2d99efad5e622..81a5bdc414927a97887d6522ad6f14bd2d494210 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_regression.R +++ b/R/pkg/inst/tests/testthat/test_mllib_regression.R @@ -87,11 +87,14 @@ test_that("spark.glm summary", { # gaussian family training <- suppressWarnings(createDataFrame(iris)) stats <- summary(spark.glm(training, Sepal_Width ~ Sepal_Length + Species)) - rStats <- summary(glm(Sepal.Width ~ Sepal.Length + Species, data = iris)) - coefs <- unlist(stats$coefficients) - rCoefs <- unlist(rStats$coefficients) + # test summary coefficients return matrix type + expect_true(class(stats$coefficients) == "matrix") + expect_true(class(stats$coefficients[, 1]) == "numeric") + + coefs <- stats$coefficients + rCoefs <- rStats$coefficients expect_true(all(abs(rCoefs - coefs) < 1e-4)) expect_true(all( rownames(stats$coefficients) == @@ -117,8 +120,8 @@ test_that("spark.glm summary", { rStats <- summary(glm(Species ~ Sepal.Length + Sepal.Width, data = rTraining, family = binomial(link = "logit"))) - coefs <- unlist(stats$coefficients) - rCoefs <- unlist(rStats$coefficients) + coefs <- stats$coefficients + rCoefs <- rStats$coefficients expect_true(all(abs(rCoefs - coefs) < 1e-4)) expect_true(all( rownames(stats$coefficients) == @@ -141,8 +144,8 @@ test_that("spark.glm summary", { stats <- summary(spark.glm(df, b ~ a1 + a2, family = "binomial", weightCol = "w")) rStats <- summary(glm(b ~ a1 + a2, family = "binomial", data = data, weights = w)) - coefs <- unlist(stats$coefficients) - rCoefs <- unlist(rStats$coefficients) + coefs <- stats$coefficients + rCoefs <- rStats$coefficients expect_true(all(abs(rCoefs - coefs) < 1e-3)) expect_true(all(rownames(stats$coefficients) == c("(Intercept)", "a1", "a2"))) expect_equal(stats$dispersion, rStats$dispersion) @@ -169,7 +172,7 @@ test_that("spark.glm summary", { data <- as.data.frame(cbind(A, b)) df <- createDataFrame(data) stats <- summary(spark.glm(df, b ~ . - 1)) - coefs <- unlist(stats$coefficients) + coefs <- stats$coefficients expect_true(all(abs(c(0.5, 0.25) - coefs) < 1e-4)) }) @@ -259,8 +262,8 @@ test_that("glm summary", { rStats <- summary(glm(Sepal.Width ~ Sepal.Length + Species, data = iris)) - coefs <- unlist(stats$coefficients) - rCoefs <- unlist(rStats$coefficients) + coefs <- stats$coefficients + rCoefs <- rStats$coefficients expect_true(all(abs(rCoefs - coefs) < 1e-4)) expect_true(all( rownames(stats$coefficients) == @@ -282,8 +285,8 @@ test_that("glm summary", { rStats <- summary(glm(Species ~ Sepal.Length + Sepal.Width, data = rTraining, family = binomial(link = "logit"))) - coefs <- unlist(stats$coefficients) - rCoefs <- unlist(rStats$coefficients) + coefs <- stats$coefficients + rCoefs <- rStats$coefficients expect_true(all(abs(rCoefs - coefs) < 1e-4)) expect_true(all( rownames(stats$coefficients) ==