diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 6efa61aa665b0f72ff6a9003f4c81c358c808d54..4aafe35d13491a23279b46ff95c8ca5f54eab4b3 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -267,7 +267,11 @@ class RDD(object): >>> def f(x): print x >>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f) """ - self.map(f).collect() # Force evaluation + def processPartition(iterator): + for x in iterator: + f(x) + yield None + self.mapPartitions(processPartition).collect() # Force evaluation def collect(self): """