From 1fe27612d7bcb8b6478a36bc16ddd4802e4ee2fc Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Sun, 29 Oct 2017 18:53:47 -0700
Subject: [PATCH] [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR sets the java.io.tmpdir for CRAN checksĂ‚ and also disables the hsperfdata for the JVM when running CRAN checks. Together this prevents files from being left behind in `/tmp`

## How was this patch tested?
Tested manually on a clean EC2 machine

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #19589 from shivaram/sparkr-tmpdir-clean.
---
 R/pkg/inst/tests/testthat/test_basic.R | 6 ++++--
 R/pkg/tests/run-all.R                  | 9 +++++++++
 R/pkg/vignettes/sparkr-vignettes.Rmd   | 8 +++++++-
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_basic.R b/R/pkg/inst/tests/testthat/test_basic.R
index de47162d53..823d26f12f 100644
--- a/R/pkg/inst/tests/testthat/test_basic.R
+++ b/R/pkg/inst/tests/testthat/test_basic.R
@@ -18,7 +18,8 @@
 context("basic tests for CRAN")
 
 test_that("create DataFrame from list or data.frame", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+                 sparkConfig = sparkRTestConfig)
 
   i <- 4
   df <- createDataFrame(data.frame(dummy = 1:i))
@@ -49,7 +50,8 @@ test_that("create DataFrame from list or data.frame", {
 })
 
 test_that("spark.glm and predict", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+                 sparkConfig = sparkRTestConfig)
 
   training <- suppressWarnings(createDataFrame(iris))
   # gaussian family
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index a1834a2202..a7f913e5fa 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -36,8 +36,17 @@ invisible(lapply(sparkRWhitelistSQLDirs,
 sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 
 sparkRTestMaster <- "local[1]"
+sparkRTestConfig <- list()
 if (identical(Sys.getenv("NOT_CRAN"), "true")) {
   sparkRTestMaster <- ""
+} else {
+  # Disable hsperfdata on CRAN
+  old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+  Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt))
+  tmpDir <- tempdir()
+  tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir)
+  sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg,
+                            spark.executor.extraJavaOptions = tmpArg)
 }
 
 test_package("SparkR")
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index caeae72e37..907bbb3d66 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -36,6 +36,12 @@ opts_hooks$set(eval = function(options) {
   }
   options
 })
+r_tmp_dir <- tempdir()
+tmp_arg <- paste("-Djava.io.tmpdir=", r_tmp_dir, sep = "")
+sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg,
+                           spark.executor.extraJavaOptions = tmp_arg)
+old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " "))
 ```
 
 ## Overview
@@ -57,7 +63,7 @@ We use default settings in which it runs in local mode. It auto downloads Spark
 
 ```{r, include=FALSE}
 install.spark()
-sparkR.session(master = "local[1]")
+sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, enableHiveSupport = FALSE)
 ```
 ```{r, eval=FALSE}
 sparkR.session()
-- 
GitLab