Skip to content
Snippets Groups Projects
Commit d3e026f8 authored by Shivaram Venkataraman's avatar Shivaram Venkataraman Committed by Andrew Or
Browse files

[SPARK-3674] [EC2] Clear SPARK_WORKER_INSTANCES when using YARN

cc andrewor14

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #6424 from shivaram/spark-worker-instances-yarn-ec2 and squashes the following commits:

db244ae [Shivaram Venkataraman] Make Python Lint happy
0593d1b [Shivaram Venkataraman] Clear SPARK_WORKER_INSTANCES when using YARN
parent a8f1f154
No related branches found
No related tags found
No related merge requests found
...@@ -219,7 +219,8 @@ def parse_args(): ...@@ -219,7 +219,8 @@ def parse_args():
"(default: %default).") "(default: %default).")
parser.add_option( parser.add_option(
"--hadoop-major-version", default="1", "--hadoop-major-version", default="1",
help="Major version of Hadoop (default: %default)") help="Major version of Hadoop. Valid options are 1 (Hadoop 1.0.4), 2 (CDH 4.2.0), yarn " +
"(Hadoop 2.4.0) (default: %default)")
parser.add_option( parser.add_option(
"-D", metavar="[ADDRESS:]PORT", dest="proxy_port", "-D", metavar="[ADDRESS:]PORT", dest="proxy_port",
help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + help="Use SSH dynamic port forwarding to create a SOCKS proxy at " +
...@@ -271,7 +272,8 @@ def parse_args(): ...@@ -271,7 +272,8 @@ def parse_args():
help="Launch fresh slaves, but use an existing stopped master if possible") help="Launch fresh slaves, but use an existing stopped master if possible")
parser.add_option( parser.add_option(
"--worker-instances", type="int", default=1, "--worker-instances", type="int", default=1,
help="Number of instances per worker: variable SPARK_WORKER_INSTANCES (default: %default)") help="Number of instances per worker: variable SPARK_WORKER_INSTANCES. Not used if YARN " +
"is used as Hadoop major version (default: %default)")
parser.add_option( parser.add_option(
"--master-opts", type="string", default="", "--master-opts", type="string", default="",
help="Extra options to give to master through SPARK_MASTER_OPTS variable " + help="Extra options to give to master through SPARK_MASTER_OPTS variable " +
...@@ -761,6 +763,10 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): ...@@ -761,6 +763,10 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
if opts.ganglia: if opts.ganglia:
modules.append('ganglia') modules.append('ganglia')
# Clear SPARK_WORKER_INSTANCES if running on YARN
if opts.hadoop_major_version == "yarn":
opts.worker_instances = ""
# NOTE: We should clone the repository before running deploy_files to # NOTE: We should clone the repository before running deploy_files to
# prevent ec2-variables.sh from being overwritten # prevent ec2-variables.sh from being overwritten
print("Cloning spark-ec2 scripts from {r}/tree/{b} on master...".format( print("Cloning spark-ec2 scripts from {r}/tree/{b} on master...".format(
...@@ -998,6 +1004,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): ...@@ -998,6 +1004,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
master_addresses = [get_dns_name(i, opts.private_ips) for i in master_nodes] master_addresses = [get_dns_name(i, opts.private_ips) for i in master_nodes]
slave_addresses = [get_dns_name(i, opts.private_ips) for i in slave_nodes] slave_addresses = [get_dns_name(i, opts.private_ips) for i in slave_nodes]
worker_instances_str = "%d" % opts.worker_instances if opts.worker_instances else ""
template_vars = { template_vars = {
"master_list": '\n'.join(master_addresses), "master_list": '\n'.join(master_addresses),
"active_master": active_master, "active_master": active_master,
...@@ -1011,7 +1018,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): ...@@ -1011,7 +1018,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
"spark_version": spark_v, "spark_version": spark_v,
"tachyon_version": tachyon_v, "tachyon_version": tachyon_v,
"hadoop_major_version": opts.hadoop_major_version, "hadoop_major_version": opts.hadoop_major_version,
"spark_worker_instances": "%d" % opts.worker_instances, "spark_worker_instances": worker_instances_str,
"spark_master_opts": opts.master_opts "spark_master_opts": opts.master_opts
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment