diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala index 48eb096063510007bc04284d6deb8c3808a65660..f1d86c0221fea897133068e17e8bf37e1048e5c2 100644 --- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala @@ -84,6 +84,6 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { } private[spark] object SparkUI { - val DEFAULT_PORT = "3030" + val DEFAULT_PORT = "4040" val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static" } diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index 3321fb5eb71b5315d207c6013324fc67d6f504ac..07c9f2382b4353a175bef9d3a72a7cd18c379ea6 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -24,7 +24,7 @@ import org.eclipse.jetty.server.Server class UISuite extends FunSuite { test("jetty port increases under contention") { - val startPort = 3030 + val startPort = 4040 val server = new Server(startPort) server.start() val (jettyServer1, boundPort1) = JettyUtils.startJettyServer("localhost", startPort, Seq()) diff --git a/docs/cluster-overview.md b/docs/cluster-overview.md index 7025c236574b63d3c6179472c720463a8b4b609f..f679cad713769d45abfc9f10936fa39c3d724d8b 100644 --- a/docs/cluster-overview.md +++ b/docs/cluster-overview.md @@ -59,8 +59,8 @@ and `addFile`. # Monitoring -Each driver program has a web UI, typically on port 3030, that displays information about running -tasks, executors, and storage usage. Simply go to `http://<driver-node>:3030` in a web browser to +Each driver program has a web UI, typically on port 4040, that displays information about running +tasks, executors, and storage usage. Simply go to `http://<driver-node>:4040` in a web browser to access this UI. The [monitoring guide](monitoring.html) also describes other monitoring options. # Job Scheduling diff --git a/docs/configuration.md b/docs/configuration.md index d4f85538b2a2d41c9178f6ff50898f07a2b7f8c2..7940d41a27ab5929c533ee716a1a014a65ecd2d8 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -111,7 +111,7 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td>spark.ui.port</td> - <td>3030</td> + <td>4040</td> <td> Port for your application's dashboard, which shows memory and workload data </td> diff --git a/docs/hardware-provisioning.md b/docs/hardware-provisioning.md index e5f054cb14bf5d6d5f9bc43dfd12db0919b6ebe5..790220500a1b3f00b8dcb66f0c03844a84d9abca 100644 --- a/docs/hardware-provisioning.md +++ b/docs/hardware-provisioning.md @@ -43,7 +43,7 @@ rest for the operating system and buffer cache. How much memory you will need will depend on your application. To determine how much your application uses for a certain dataset size, load part of your dataset in a Spark RDD and use the -Storage tab of Spark's monitoring UI (`http://<driver-node>:3030`) to see its size in memory. +Storage tab of Spark's monitoring UI (`http://<driver-node>:4040`) to see its size in memory. Note that memory usage is greatly affected by storage level and serialization format -- see the [tuning guide](tuning.html) for tips on how to reduce it. @@ -59,7 +59,7 @@ In our experience, when the data is in memory, a lot of Spark applications are n Using a **10 Gigabit** or higher network is the best way to make these applications faster. This is especially true for "distributed reduce" applications such as group-bys, reduce-bys, and SQL joins. In any given application, you can see how much data Spark shuffles across the network -from the application's monitoring UI (`http://<driver-node>:3030`). +from the application's monitoring UI (`http://<driver-node>:4040`). # CPU Cores diff --git a/docs/monitoring.md b/docs/monitoring.md index 0e3606f71ae54603f4f356a83dd70c6d55a31487..5f456b999b0e187434bfcee09b572a2cde508657 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -7,7 +7,7 @@ There are several ways to monitor Spark applications. # Web Interfaces -Every SparkContext launches a web UI, by default on port 3030, that +Every SparkContext launches a web UI, by default on port 4040, that displays useful information about the application. This includes: * A list of scheduler stages and tasks @@ -15,9 +15,9 @@ displays useful information about the application. This includes: * Information about the running executors * Environmental information. -You can access this interface by simply opening `http://<driver-node>:3030` in a web browser. +You can access this interface by simply opening `http://<driver-node>:4040` in a web browser. If multiple SparkContexts are running on the same host, they will bind to succesive ports -beginning with 3030 (3031, 3032, etc). +beginning with 4040 (4041, 4042, etc). Spark's Standlone Mode cluster manager also has its own [web UI](spark-standalone.html#monitoring-and-logging). diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 932e70db96e1eb5402592f8a585f3e578ac5b4d5..419d0fe13f944621dd06df9620f16c3f28bbf605 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -215,11 +215,10 @@ def launch_cluster(conn, opts, cluster_name): master_group.authorize(src_group=slave_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') - master_group.authorize('tcp', 33000, 33000, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') - master_group.authorize('tcp', 3030, 3035, '0.0.0.0/0') + master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0') if opts.ganglia: master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0') if slave_group.rules == []: # Group was just now created