Skip to content
Snippets Groups Projects
Commit 85ce5f27 authored by Matei Zaharia's avatar Matei Zaharia
Browse files

Merge pull request #308 from admobius/multi-zone

Let EC2 script launch slaves in multiple availability zones
parents 3ff6f4bd 606d252d
No related branches found
No related tags found
No related merge requests found
...@@ -61,7 +61,9 @@ def parse_args(): ...@@ -61,7 +61,9 @@ def parse_args():
parser.add_option("-r", "--region", default="us-east-1", parser.add_option("-r", "--region", default="us-east-1",
help="EC2 region zone to launch instances in") help="EC2 region zone to launch instances in")
parser.add_option("-z", "--zone", default="", parser.add_option("-z", "--zone", default="",
help="Availability zone to launch instances in") help="Availability zone to launch instances in, or 'all' to spread " +
"slaves across multiple (an additional $0.01/Gb for bandwidth" +
"between zones applies)")
parser.add_option("-a", "--ami", default="latest", parser.add_option("-a", "--ami", default="latest",
help="Amazon Machine Image ID to use, or 'latest' to use latest " + help="Amazon Machine Image ID to use, or 'latest' to use latest " +
"available AMI (default: latest)") "available AMI (default: latest)")
...@@ -217,17 +219,25 @@ def launch_cluster(conn, opts, cluster_name): ...@@ -217,17 +219,25 @@ def launch_cluster(conn, opts, cluster_name):
# Launch spot instances with the requested price # Launch spot instances with the requested price
print ("Requesting %d slaves as spot instances with price $%.3f" % print ("Requesting %d slaves as spot instances with price $%.3f" %
(opts.slaves, opts.spot_price)) (opts.slaves, opts.spot_price))
slave_reqs = conn.request_spot_instances( zones = get_zones(conn, opts)
price = opts.spot_price, num_zones = len(zones)
image_id = opts.ami, i = 0
launch_group = "launch-group-%s" % cluster_name, my_req_ids = []
placement = opts.zone, for zone in zones:
count = opts.slaves, num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
key_name = opts.key_pair, slave_reqs = conn.request_spot_instances(
security_groups = [slave_group], price = opts.spot_price,
instance_type = opts.instance_type, image_id = opts.ami,
block_device_map = block_map) launch_group = "launch-group-%s" % cluster_name,
my_req_ids = [req.id for req in slave_reqs] placement = zone,
count = num_slaves_this_zone,
key_name = opts.key_pair,
security_groups = [slave_group],
instance_type = opts.instance_type,
block_device_map = block_map)
my_req_ids += [req.id for req in slave_reqs]
i += 1
print "Waiting for spot instances to be granted..." print "Waiting for spot instances to be granted..."
try: try:
while True: while True:
...@@ -262,20 +272,30 @@ def launch_cluster(conn, opts, cluster_name): ...@@ -262,20 +272,30 @@ def launch_cluster(conn, opts, cluster_name):
sys.exit(0) sys.exit(0)
else: else:
# Launch non-spot instances # Launch non-spot instances
slave_res = image.run(key_name = opts.key_pair, zones = get_zones(conn, opts)
security_groups = [slave_group], num_zones = len(zones)
instance_type = opts.instance_type, i = 0
placement = opts.zone, slave_nodes = []
min_count = opts.slaves, for zone in zones:
max_count = opts.slaves, num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
block_device_map = block_map) slave_res = image.run(key_name = opts.key_pair,
slave_nodes = slave_res.instances security_groups = [slave_group],
print "Launched slaves, regid = " + slave_res.id instance_type = opts.instance_type,
placement = zone,
min_count = num_slaves_this_zone,
max_count = num_slaves_this_zone,
block_device_map = block_map)
slave_nodes += slave_res.instances
print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
zone, slave_res.id)
i += 1
# Launch masters # Launch masters
master_type = opts.master_instance_type master_type = opts.master_instance_type
if master_type == "": if master_type == "":
master_type = opts.instance_type master_type = opts.instance_type
if opts.zone == 'all':
opts.zone = random.choice(conn.get_all_zones()).name
master_res = image.run(key_name = opts.key_pair, master_res = image.run(key_name = opts.key_pair,
security_groups = [master_group], security_groups = [master_group],
instance_type = master_type, instance_type = master_type,
...@@ -284,7 +304,7 @@ def launch_cluster(conn, opts, cluster_name): ...@@ -284,7 +304,7 @@ def launch_cluster(conn, opts, cluster_name):
max_count = 1, max_count = 1,
block_device_map = block_map) block_device_map = block_map)
master_nodes = master_res.instances master_nodes = master_res.instances
print "Launched master, regid = " + master_res.id print "Launched master in %s, regid = %s" % (zone, master_res.id)
zoo_nodes = [] zoo_nodes = []
...@@ -474,6 +494,23 @@ def ssh(host, opts, command): ...@@ -474,6 +494,23 @@ def ssh(host, opts, command):
(opts.identity_file, opts.user, host, command), shell=True) (opts.identity_file, opts.user, host, command), shell=True)
# Gets a list of zones to launch instances in
def get_zones(conn, opts):
if opts.zone == 'all':
zones = [z.name for z in conn.get_all_zones()]
else:
zones = [opts.zone]
return zones
# Gets the number of items in a partition
def get_partition(total, num_partitions, current_partitions):
num_slaves_this_zone = total / num_partitions
if (total % num_partitions) - current_partitions > 0:
num_slaves_this_zone += 1
return num_slaves_this_zone
def main(): def main():
(opts, action, cluster_name) = parse_args() (opts, action, cluster_name) = parse_args()
conn = boto.ec2.connect_to_region(opts.region) conn = boto.ec2.connect_to_region(opts.region)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment