Skip to content
Snippets Groups Projects
Commit b3ea5793 authored by Xiangrui Meng's avatar Xiangrui Meng
Browse files

[SPARK-14831][.2][ML][R] rename ml.save/ml.load to write.ml/read.ml

## What changes were proposed in this pull request?

Continue the work of #12789 to rename ml.asve/ml.load to write.ml/read.ml, which are more consistent with read.df/write.df and other methods in SparkR.

I didn't rename `data` to `df` because we still use `predict` for prediction, which uses `newData` to match the signature in R.

## How was this patch tested?

Existing unit tests.

cc: yanboliang thunterdb

Author: Xiangrui Meng <meng@databricks.com>

Closes #12807 from mengxr/SPARK-14831.
parent 7fbe1bb2
No related branches found
No related tags found
No related merge requests found
......@@ -110,7 +110,7 @@ exportMethods("arrange",
"write.json",
"write.parquet",
"write.text",
"ml.save")
"write.ml")
exportClasses("Column")
......@@ -305,7 +305,7 @@ export("as.DataFrame",
"tables",
"uncacheTable",
"print.summary.GeneralizedLinearRegressionModel",
"ml.load")
"read.ml")
export("structField",
"structField.jobj",
......
......@@ -1213,6 +1213,6 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s
#' @export
setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") })
#' @rdname ml.save
#' @rdname write.ml
#' @export
setGeneric("ml.save", function(object, path, ...) { standardGeneric("ml.save") })
setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") })
......@@ -403,17 +403,17 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname ml.save
#' @name ml.save
#' @rdname write.ml
#' @name write.ml
#' @export
#' @examples
#' \dontrun{
#' df <- createDataFrame(sqlContext, infert)
#' model <- spark.naiveBayes(education ~ ., df, laplace = 0)
#' path <- "path/to/model"
#' ml.save(model, path)
#' write.ml(model, path)
#' }
setMethod("ml.save", signature(object = "NaiveBayesModel", path = "character"),
setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"),
function(object, path, overwrite = FALSE) {
writer <- callJMethod(object@jobj, "write")
if (overwrite) {
......@@ -429,16 +429,16 @@ setMethod("ml.save", signature(object = "NaiveBayesModel", path = "character"),
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname ml.save
#' @name ml.save
#' @rdname write.ml
#' @name write.ml
#' @export
#' @examples
#' \dontrun{
#' model <- spark.survreg(trainingData, Surv(futime, fustat) ~ ecog_ps + rx)
#' path <- "path/to/model"
#' ml.save(model, path)
#' write.ml(model, path)
#' }
setMethod("ml.save", signature(object = "AFTSurvivalRegressionModel", path = "character"),
setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "character"),
function(object, path, overwrite = FALSE) {
writer <- callJMethod(object@jobj, "write")
if (overwrite) {
......@@ -454,16 +454,16 @@ setMethod("ml.save", signature(object = "AFTSurvivalRegressionModel", path = "ch
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname ml.save
#' @name ml.save
#' @rdname write.ml
#' @name write.ml
#' @export
#' @examples
#' \dontrun{
#' model <- glm(y ~ x, trainingData)
#' path <- "path/to/model"
#' ml.save(model, path)
#' write.ml(model, path)
#' }
setMethod("ml.save", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
function(object, path, overwrite = FALSE) {
writer <- callJMethod(object@jobj, "write")
if (overwrite) {
......@@ -479,16 +479,16 @@ setMethod("ml.save", signature(object = "GeneralizedLinearRegressionModel", path
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname ml.save
#' @name ml.save
#' @rdname write.ml
#' @name write.ml
#' @export
#' @examples
#' \dontrun{
#' model <- spark.kmeans(x, k = 2, initializationMode="random")
#' path <- "path/to/model"
#' ml.save(model, path)
#' write.ml(model, path)
#' }
setMethod("ml.save", signature(object = "KMeansModel", path = "character"),
setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
function(object, path, overwrite = FALSE) {
writer <- callJMethod(object@jobj, "write")
if (overwrite) {
......@@ -501,15 +501,15 @@ setMethod("ml.save", signature(object = "KMeansModel", path = "character"),
#'
#' @param path Path of the model to read.
#' @return a fitted MLlib model
#' @rdname ml.load
#' @name ml.load
#' @rdname read.ml
#' @name read.ml
#' @export
#' @examples
#' \dontrun{
#' path <- "path/to/model"
#' model <- ml.load(path)
#' model <- read.ml(path)
#' }
ml.load <- function(path) {
read.ml <- function(path) {
path <- suppressWarnings(normalizePath(path))
jobj <- callJStatic("org.apache.spark.ml.r.RWrappers", "load", path)
if (isInstanceOf(jobj, "org.apache.spark.ml.r.NaiveBayesWrapper")) {
......
......@@ -133,10 +133,10 @@ test_that("spark.glm save/load", {
s <- summary(m)
modelPath <- tempfile(pattern = "glm", fileext = ".tmp")
ml.save(m, modelPath)
expect_error(ml.save(m, modelPath))
ml.save(m, modelPath, overwrite = TRUE)
m2 <- ml.load(modelPath)
write.ml(m, modelPath)
expect_error(write.ml(m, modelPath))
write.ml(m, modelPath, overwrite = TRUE)
m2 <- read.ml(modelPath)
s2 <- summary(m2)
expect_equal(s$coefficients, s2$coefficients)
......@@ -263,10 +263,10 @@ test_that("glm save/load", {
s <- summary(m)
modelPath <- tempfile(pattern = "glm", fileext = ".tmp")
ml.save(m, modelPath)
expect_error(ml.save(m, modelPath))
ml.save(m, modelPath, overwrite = TRUE)
m2 <- ml.load(modelPath)
write.ml(m, modelPath)
expect_error(write.ml(m, modelPath))
write.ml(m, modelPath, overwrite = TRUE)
m2 <- read.ml(modelPath)
s2 <- summary(m2)
expect_equal(s$coefficients, s2$coefficients)
......@@ -311,10 +311,10 @@ test_that("spark.kmeans", {
# Test model save/load
modelPath <- tempfile(pattern = "kmeans", fileext = ".tmp")
ml.save(model, modelPath)
expect_error(ml.save(model, modelPath))
ml.save(model, modelPath, overwrite = TRUE)
model2 <- ml.load(modelPath)
write.ml(model, modelPath)
expect_error(write.ml(model, modelPath))
write.ml(model, modelPath, overwrite = TRUE)
model2 <- read.ml(modelPath)
summary2 <- summary(model2)
expect_equal(sort(unlist(summary.model$size)), sort(unlist(summary2$size)))
expect_equal(summary.model$coefficients, summary2$coefficients)
......@@ -378,10 +378,10 @@ test_that("naiveBayes", {
# Test model save/load
modelPath <- tempfile(pattern = "naiveBayes", fileext = ".tmp")
ml.save(m, modelPath)
expect_error(ml.save(m, modelPath))
ml.save(m, modelPath, overwrite = TRUE)
m2 <- ml.load(modelPath)
write.ml(m, modelPath)
expect_error(write.ml(m, modelPath))
write.ml(m, modelPath, overwrite = TRUE)
m2 <- read.ml(modelPath)
s2 <- summary(m2)
expect_equal(s$apriori, s2$apriori)
expect_equal(s$tables, s2$tables)
......@@ -435,10 +435,10 @@ test_that("spark.survreg", {
# Test model save/load
modelPath <- tempfile(pattern = "survreg", fileext = ".tmp")
ml.save(model, modelPath)
expect_error(ml.save(model, modelPath))
ml.save(model, modelPath, overwrite = TRUE)
model2 <- ml.load(modelPath)
write.ml(model, modelPath)
expect_error(write.ml(model, modelPath))
write.ml(model, modelPath, overwrite = TRUE)
model2 <- read.ml(modelPath)
stats2 <- summary(model2)
coefs2 <- as.vector(stats2$coefficients[, 1])
expect_equal(coefs, coefs2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment