From 9b2c877beccf34fc7c063574496be7e6281227ad Mon Sep 17 00:00:00 2001 From: Rishabh Bhardwaj <rbnext29@gmail.com> Date: Tue, 13 Jun 2017 15:09:12 +0100 Subject: [PATCH] [SPARK-21039][SPARK CORE] Use treeAggregate instead of aggregate in DataFrame.stat.bloomFilter ## What changes were proposed in this pull request? To use treeAggregate instead of aggregate in DataFrame.stat.bloomFilter to parallelize the operation of merging the bloom filters (Please fill in changes proposed in this fix) ## How was this patch tested? unit tests passed (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Rishabh Bhardwaj <rbnext29@gmail.com> Author: Rishabh Bhardwaj <admin@rishabh.local> Author: Rishabh Bhardwaj <r0b00ko@rishabh.Dlink> Author: Rishabh Bhardwaj <admin@Admins-MacBook-Pro.local> Author: Rishabh Bhardwaj <r0b00ko@rishabh.local> Closes #18263 from rishabhbhardwaj/SPARK-21039. --- .../scala/org/apache/spark/sql/DataFrameStatFunctions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index c856d3099f..531c613afb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -551,7 +551,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { ) } - singleCol.queryExecution.toRdd.aggregate(zero)( + singleCol.queryExecution.toRdd.treeAggregate(zero)( (filter: BloomFilter, row: InternalRow) => { updater(filter, row) filter -- GitLab