Skip to content
Snippets Groups Projects
Commit ce6deb1e authored by Daoyuan's avatar Daoyuan Committed by Michael Armbrust
Browse files

[SQL] Code Cleanup: Left Semi Hash Join


Some improvement for PR #837, add another case to white list and use `filter` to build result iterator.

Author: Daoyuan <daoyuan.wang@intel.com>

Closes #1049 from adrian-wang/clean-LeftSemiJoinHash and squashes the following commits:

b314d5a [Daoyuan] change hashSet name
27579a9 [Daoyuan] add semijoin to white list and use filter to create new iterator in LeftSemiJoinBNL

Signed-off-by: default avatarMichael Armbrust <michael@databricks.com>
parent 4107cce5
No related branches found
No related tags found
No related merge requests found
Showing
with 67 additions and 33 deletions
......@@ -169,7 +169,7 @@ case class LeftSemiJoinHash(
def execute() = {
buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
val hashTable = new java.util.HashSet[Row]()
val hashSet = new java.util.HashSet[Row]()
var currentRow: Row = null
// Create a Hash set of buildKeys
......@@ -177,43 +177,17 @@ case class LeftSemiJoinHash(
currentRow = buildIter.next()
val rowKey = buildSideKeyGenerator(currentRow)
if(!rowKey.anyNull) {
val keyExists = hashTable.contains(rowKey)
val keyExists = hashSet.contains(rowKey)
if (!keyExists) {
hashTable.add(rowKey)
hashSet.add(rowKey)
}
}
}
new Iterator[Row] {
private[this] var currentStreamedRow: Row = _
private[this] var currentHashMatched: Boolean = false
private[this] val joinKeys = streamSideKeyGenerator()
override final def hasNext: Boolean =
streamIter.hasNext && fetchNext()
override final def next() = {
currentStreamedRow
}
/**
* Searches the streamed iterator for the next row that has at least one match in hashtable.
*
* @return true if the search is successful, and false the streamed iterator runs out of
* tuples.
*/
private final def fetchNext(): Boolean = {
currentHashMatched = false
while (!currentHashMatched && streamIter.hasNext) {
currentStreamedRow = streamIter.next()
if (!joinKeys(currentStreamedRow).anyNull) {
currentHashMatched = hashTable.contains(joinKeys.currentValue)
}
}
currentHashMatched
}
}
val joinKeys = streamSideKeyGenerator()
streamIter.filter(current => {
!joinKeys(current).anyNull && hashSet.contains(joinKeys.currentValue)
})
}
}
}
......
0 val_0
0 val_0
0 val_0
2 val_2
4 val_4
5 val_5
5 val_5
5 val_5
8 val_8
9 val_9
10 val_10
0 val_0
0 val_0
0 val_0
4 val_2
8 val_4
10 val_5
10 val_5
10 val_5
val_0
val_0
val_0
val_10
val_2
val_4
val_5
val_5
val_5
val_8
val_9
0 val_0
0 val_0
0 val_0
val_10
val_8
val_9
4 val_2
8 val_4
10 val_5
10 val_5
10 val_5
0
0
0
0
0
0
2
4
4
5
5
5
8
8
9
10
10
10
10
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment