From 03d130f9734be66e8aefc4ffaa207ee13e837629 Mon Sep 17 00:00:00 2001 From: Liwei Lin <lwlin7@gmail.com> Date: Sat, 2 Apr 2016 17:55:46 -0700 Subject: [PATCH] [SPARK-14342][CORE][DOCS][TESTS] Remove straggler references to Tachyon ## What changes were proposed in this pull request? Straggler references to Tachyon were removed: - for docs, `tachyon` has been generalized as `off-heap memory`; - for Mesos test suits, the key-value `tachyon:true`/`tachyon:false` has been changed to `os:centos`/`os:ubuntu`, since `os` is an example constrain used by the [Mesos official docs](http://mesos.apache.org/documentation/attributes-resources/). ## How was this patch tested? Existing test suites. Author: Liwei Lin <lwlin7@gmail.com> Closes #12129 from lw-lin/tachyon-cleanup. --- .../apache/spark/api/java/StorageLevels.java | 4 +-- .../cluster/mesos/MesosSchedulerUtils.scala | 4 +-- .../mesos/MesosSchedulerUtilsSuite.scala | 32 +++++++++---------- docs/running-on-mesos.md | 4 +-- docs/streaming-programming-guide.md | 2 +- python/pyspark/storagelevel.py | 2 +- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/core/src/main/java/org/apache/spark/api/java/StorageLevels.java b/core/src/main/java/org/apache/spark/api/java/StorageLevels.java index 666c797738..23673d3e3d 100644 --- a/core/src/main/java/org/apache/spark/api/java/StorageLevels.java +++ b/core/src/main/java/org/apache/spark/api/java/StorageLevels.java @@ -39,8 +39,8 @@ public class StorageLevels { /** * Create a new StorageLevel object. * @param useDisk saved to disk, if true - * @param useMemory saved to memory, if true - * @param useOffHeap saved to Tachyon, if true + * @param useMemory saved to on-heap memory, if true + * @param useOffHeap saved to off-heap memory, if true * @param deserialized saved as deserialized objects, if true * @param replication replication factor */ diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 35f914355d..233bdc23e6 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -283,11 +283,11 @@ private[mesos] trait MesosSchedulerUtils extends Logging { * are separated by ':'. The ':' implies equality (for singular values) and "is one of" for * multiple values (comma separated). For example: * {{{ - * parseConstraintString("tachyon:true;zone:us-east-1a,us-east-1b") + * parseConstraintString("os:centos7;zone:us-east-1a,us-east-1b") * // would result in * <code> * Map( - * "tachyon" -> Set("true"), + * "os" -> Set("centos7"), * "zone": -> Set("us-east-1a", "us-east-1b") * ) * }}} diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala index 85437b2f80..ceb3a52983 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala @@ -59,10 +59,10 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS test("parse a non-empty constraint string correctly") { val expectedMap = Map( - "tachyon" -> Set("true"), + "os" -> Set("centos7"), "zone" -> Set("us-east-1a", "us-east-1b") ) - utils.parseConstraintString("tachyon:true;zone:us-east-1a,us-east-1b") should be (expectedMap) + utils.parseConstraintString("os:centos7;zone:us-east-1a,us-east-1b") should be (expectedMap) } test("parse an empty constraint string correctly") { @@ -71,35 +71,35 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS test("throw an exception when the input is malformed") { an[IllegalArgumentException] should be thrownBy - utils.parseConstraintString("tachyon;zone:us-east") + utils.parseConstraintString("os;zone:us-east") } test("empty values for attributes' constraints matches all values") { - val constraintsStr = "tachyon:" + val constraintsStr = "os:" val parsedConstraints = utils.parseConstraintString(constraintsStr) - parsedConstraints shouldBe Map("tachyon" -> Set()) + parsedConstraints shouldBe Map("os" -> Set()) val zoneSet = Value.Set.newBuilder().addItem("us-east-1a").addItem("us-east-1b").build() - val noTachyonOffer = Map("zone" -> zoneSet) - val tachyonTrueOffer = Map("tachyon" -> Value.Text.newBuilder().setValue("true").build()) - val tachyonFalseOffer = Map("tachyon" -> Value.Text.newBuilder().setValue("false").build()) + val noOsOffer = Map("zone" -> zoneSet) + val centosOffer = Map("os" -> Value.Text.newBuilder().setValue("centos").build()) + val ubuntuOffer = Map("os" -> Value.Text.newBuilder().setValue("ubuntu").build()) - utils.matchesAttributeRequirements(parsedConstraints, noTachyonOffer) shouldBe false - utils.matchesAttributeRequirements(parsedConstraints, tachyonTrueOffer) shouldBe true - utils.matchesAttributeRequirements(parsedConstraints, tachyonFalseOffer) shouldBe true + utils.matchesAttributeRequirements(parsedConstraints, noOsOffer) shouldBe false + utils.matchesAttributeRequirements(parsedConstraints, centosOffer) shouldBe true + utils.matchesAttributeRequirements(parsedConstraints, ubuntuOffer) shouldBe true } test("subset match is performed for set attributes") { val supersetConstraint = Map( - "tachyon" -> Value.Text.newBuilder().setValue("true").build(), + "os" -> Value.Text.newBuilder().setValue("ubuntu").build(), "zone" -> Value.Set.newBuilder() .addItem("us-east-1a") .addItem("us-east-1b") .addItem("us-east-1c") .build()) - val zoneConstraintStr = "tachyon:;zone:us-east-1a,us-east-1c" + val zoneConstraintStr = "os:;zone:us-east-1a,us-east-1c" val parsedConstraints = utils.parseConstraintString(zoneConstraintStr) utils.matchesAttributeRequirements(parsedConstraints, supersetConstraint) shouldBe true @@ -131,10 +131,10 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS } test("equality match is performed for text attributes") { - val offerAttribs = Map("tachyon" -> Value.Text.newBuilder().setValue("true").build()) + val offerAttribs = Map("os" -> Value.Text.newBuilder().setValue("centos7").build()) - val trueConstraint = utils.parseConstraintString("tachyon:true") - val falseConstraint = utils.parseConstraintString("tachyon:false") + val trueConstraint = utils.parseConstraintString("os:centos7") + val falseConstraint = utils.parseConstraintString("os:ubuntu") utils.matchesAttributeRequirements(trueConstraint, offerAttribs) shouldBe true utils.matchesAttributeRequirements(falseConstraint, offerAttribs) shouldBe false diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index 293a82882e..8e47301a75 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -215,10 +215,10 @@ conf.set("spark.mesos.coarse", "false") You may also make use of `spark.mesos.constraints` to set attribute based constraints on mesos resource offers. By default, all resource offers will be accepted. {% highlight scala %} -conf.set("spark.mesos.constraints", "tachyon:true;us-east-1:false") +conf.set("spark.mesos.constraints", "os:centos7;us-east-1:false") {% endhighlight %} -For example, Let's say `spark.mesos.constraints` is set to `tachyon:true;us-east-1:false`, then the resource offers will be checked to see if they meet both these constraints and only then will be accepted to start new executors. +For example, Let's say `spark.mesos.constraints` is set to `os:centos7;us-east-1:false`, then the resource offers will be checked to see if they meet both these constraints and only then will be accepted to start new executors. # Mesos Docker Support diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 8d21917a7d..7f6c0ed699 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -2178,7 +2178,7 @@ overall processing throughput of the system, its use is still recommended to ach consistent batch processing times. Make sure you set the CMS GC on both the driver (using `--driver-java-options` in `spark-submit`) and the executors (using [Spark configuration](configuration.html#runtime-environment) `spark.executor.extraJavaOptions`). * **Other tips**: To further reduce GC overheads, here are some more tips to try. - - Use Tachyon for off-heap storage of persisted RDDs. See more detail in the [Spark Programming Guide](programming-guide.html#rdd-persistence). + - Persist RDDs using the `OFF_HEAP` storage level. See more detail in the [Spark Programming Guide](programming-guide.html#rdd-persistence). - Use more executors with smaller heap sizes. This will reduce the GC pressure within each JVM heap. diff --git a/python/pyspark/storagelevel.py b/python/pyspark/storagelevel.py index d4f184a85d..176e3bb41c 100644 --- a/python/pyspark/storagelevel.py +++ b/python/pyspark/storagelevel.py @@ -44,7 +44,7 @@ class StorageLevel(object): result = "" result += "Disk " if self.useDisk else "" result += "Memory " if self.useMemory else "" - result += "Tachyon " if self.useOffHeap else "" + result += "OffHeap " if self.useOffHeap else "" result += "Deserialized " if self.deserialized else "Serialized " result += "%sx Replicated" % self.replication return result -- GitLab