From 06c0544113ba77857c5cb1bbf94dcaf21d0b01af Mon Sep 17 00:00:00 2001 From: jerryshao <sshao@hortonworks.com> Date: Mon, 5 Jun 2017 11:06:50 -0700 Subject: [PATCH] [SPARK-20981][SPARKSUBMIT] Add new configuration spark.jars.repositories as equivalence of --repositories ## What changes were proposed in this pull request? In our use case of launching Spark applications via REST APIs (Livy), there's no way for user to specify command line arguments, all Spark configurations are set through configurations map. For "--repositories" because there's no equivalent Spark configuration, so we cannot specify the custom repository through configuration. So here propose to add "--repositories" equivalent configuration in Spark. ## How was this patch tested? New UT added. Author: jerryshao <sshao@hortonworks.com> Closes #18201 from jerryshao/SPARK-20981. --- .../spark/deploy/SparkSubmitArguments.scala | 2 ++ .../spark/deploy/SparkSubmitSuite.scala | 20 +++++++++++++++++++ docs/configuration.md | 13 ++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 5100a17006..b76a3d2bea 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -187,6 +187,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull packagesExclusions = Option(packagesExclusions) .orElse(sparkProperties.get("spark.jars.excludes")).orNull + repositories = Option(repositories) + .orElse(sparkProperties.get("spark.jars.repositories")).orNull deployMode = Option(deployMode) .orElse(sparkProperties.get("spark.submit.deployMode")) .orElse(env.get("DEPLOY_MODE")) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 6e9721c459..de719990cf 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -477,6 +477,26 @@ class SparkSubmitSuite } } + test("includes jars passed through spark.jars.packages and spark.jars.repositories") { + val unusedJar = TestUtils.createJarWithClasses(Seq.empty) + val main = MavenCoordinate("my.great.lib", "mylib", "0.1") + val dep = MavenCoordinate("my.great.dep", "mylib", "0.1") + // Test using "spark.jars.packages" and "spark.jars.repositories" configurations. + IvyTestUtils.withRepository(main, Some(dep.toString), None) { repo => + val args = Seq( + "--class", JarCreationTest.getClass.getName.stripSuffix("$"), + "--name", "testApp", + "--master", "local-cluster[2,1,1024]", + "--conf", "spark.jars.packages=my.great.lib:mylib:0.1,my.great.dep:mylib:0.1", + "--conf", s"spark.jars.repositories=$repo", + "--conf", "spark.ui.enabled=false", + "--conf", "spark.master.rest.enabled=false", + unusedJar.toString, + "my.great.lib.MyLib", "my.great.dep.MyLib") + runSparkSubmit(args) + } + } + // TODO(SPARK-9603): Building a package is flaky on Jenkins Maven builds. // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log ignore("correctly builds R packages included in a jar with --packages") { diff --git a/docs/configuration.md b/docs/configuration.md index 0771e36f80..f777811a93 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -474,10 +474,19 @@ Apart from these, the following properties are also available, and may be useful <td> Path to an Ivy settings file to customize resolution of jars specified using <code>spark.jars.packages</code> instead of the built-in defaults, such as maven central. Additional repositories given by the command-line - option <code>--repositories</code> will also be included. Useful for allowing Spark to resolve artifacts from behind - a firewall e.g. via an in-house artifact server like Artifactory. Details on the settings file format can be + option <code>--repositories</code> or <code>spark.jars.repositories</code> will also be included. + Useful for allowing Spark to resolve artifacts from behind a firewall e.g. via an in-house + artifact server like Artifactory. Details on the settings file format can be found at http://ant.apache.org/ivy/history/latest-milestone/settings.html </td> +</tr> + <tr> + <td><code>spark.jars.repositories</code></td> + <td></td> + <td> + Comma-separated list of additional remote repositories to search for the maven coordinates + given with <code>--packages</code> or <code>spark.jars.packages</code>. + </td> </tr> <tr> <td><code>spark.pyspark.driver.python</code></td> -- GitLab