From 2bc5e0616d878b09daa8e31a7a1fdb7127bca079 Mon Sep 17 00:00:00 2001 From: alyaxey <oleksii.sliusarenko@grammarly.com> Date: Tue, 19 May 2015 16:45:52 -0700 Subject: [PATCH] [SPARK-6246] [EC2] fixed support for more than 100 nodes This is a small fix. But it is important for amazon users because as the ticket states, "spark-ec2 can't handle clusters with > 100 nodes" now. Author: alyaxey <oleksii.sliusarenko@grammarly.com> Closes #6267 from alyaxey/ec2_100_nodes_fix and squashes the following commits: 1e0d747 [alyaxey] [SPARK-6246] fixed support for more than 100 nodes --- ec2/spark_ec2.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index be92d5f45a..c6d5a1f0d0 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -864,7 +864,11 @@ def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state): for i in cluster_instances: i.update() - statuses = conn.get_all_instance_status(instance_ids=[i.id for i in cluster_instances]) + max_batch = 100 + statuses = [] + for j in xrange(0, len(cluster_instances), max_batch): + batch = [i.id for i in cluster_instances[j:j + max_batch]] + statuses.extend(conn.get_all_instance_status(instance_ids=batch)) if cluster_state == 'ssh-ready': if all(i.state == 'running' for i in cluster_instances) and \ -- GitLab