Skip to content
Snippets Groups Projects
Commit 6f986f0b authored by Petko Nikolov's avatar Petko Nikolov Committed by Reynold Xin
Browse files

[SPARK-1268] Adding XOR and AND-NOT operations to spark.util.collection.BitSet

Symmetric difference (xor) in particular is useful for computing some distance metrics (e.g. Hamming). Unit tests added.

Author: Petko Nikolov <nikolov@soundcloud.com>

Closes #172 from petko-nikolov/bitset-imprv and squashes the following commits:

451f28b [Petko Nikolov] fixed style mistakes
5beba18 [Petko Nikolov] rm outer loop in andNot test
0e61035 [Petko Nikolov] conform to spark style; rm redundant asserts; more unit tests added; use arraycopy instead of loop
d53cdb9 [Petko Nikolov] rm incidentally added space
4e1df43 [Petko Nikolov] adding xor and and-not to BitSet; unit tests added
parent 53953d09
No related branches found
No related tags found
No related merge requests found
......@@ -88,6 +88,45 @@ class BitSet(numBits: Int) extends Serializable {
newBS
}
/**
* Compute the symmetric difference by performing bit-wise XOR of the two sets returning the
* result.
*/
def ^(other: BitSet): BitSet = {
val newBS = new BitSet(math.max(capacity, other.capacity))
val smaller = math.min(numWords, other.numWords)
var ind = 0
while (ind < smaller) {
newBS.words(ind) = words(ind) ^ other.words(ind)
ind += 1
}
if (ind < numWords) {
Array.copy( words, ind, newBS.words, ind, numWords - ind )
}
if (ind < other.numWords) {
Array.copy( other.words, ind, newBS.words, ind, other.numWords - ind )
}
newBS
}
/**
* Compute the difference of the two sets by performing bit-wise AND-NOT returning the
* result.
*/
def andNot(other: BitSet): BitSet = {
val newBS = new BitSet(capacity)
val smaller = math.min(numWords, other.numWords)
var ind = 0
while (ind < smaller) {
newBS.words(ind) = words(ind) & ~other.words(ind)
ind += 1
}
if (ind < numWords) {
Array.copy( words, ind, newBS.words, ind, numWords - ind )
}
newBS
}
/**
* Sets the bit at the specified index to true.
* @param index the bit index
......
......@@ -69,4 +69,87 @@ class BitSetSuite extends FunSuite {
assert(bitset.nextSetBit(96) === 96)
assert(bitset.nextSetBit(97) === -1)
}
test( "xor len(bitsetX) < len(bitsetY)" ) {
val setBitsX = Seq( 0, 2, 3, 37, 41 )
val setBitsY = Seq( 0, 1, 3, 37, 38, 41, 85)
val bitsetX = new BitSet(60)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(100)
setBitsY.foreach( i => bitsetY.set(i))
val bitsetXor = bitsetX ^ bitsetY
assert(bitsetXor.nextSetBit(0) === 1)
assert(bitsetXor.nextSetBit(1) === 1)
assert(bitsetXor.nextSetBit(2) === 2)
assert(bitsetXor.nextSetBit(3) === 38)
assert(bitsetXor.nextSetBit(38) === 38)
assert(bitsetXor.nextSetBit(39) === 85)
assert(bitsetXor.nextSetBit(42) === 85)
assert(bitsetXor.nextSetBit(85) === 85)
assert(bitsetXor.nextSetBit(86) === -1)
}
test( "xor len(bitsetX) > len(bitsetY)" ) {
val setBitsX = Seq( 0, 1, 3, 37, 38, 41, 85)
val setBitsY = Seq( 0, 2, 3, 37, 41 )
val bitsetX = new BitSet(100)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(60)
setBitsY.foreach( i => bitsetY.set(i))
val bitsetXor = bitsetX ^ bitsetY
assert(bitsetXor.nextSetBit(0) === 1)
assert(bitsetXor.nextSetBit(1) === 1)
assert(bitsetXor.nextSetBit(2) === 2)
assert(bitsetXor.nextSetBit(3) === 38)
assert(bitsetXor.nextSetBit(38) === 38)
assert(bitsetXor.nextSetBit(39) === 85)
assert(bitsetXor.nextSetBit(42) === 85)
assert(bitsetXor.nextSetBit(85) === 85)
assert(bitsetXor.nextSetBit(86) === -1)
}
test( "andNot len(bitsetX) < len(bitsetY)" ) {
val setBitsX = Seq( 0, 2, 3, 37, 41, 48 )
val setBitsY = Seq( 0, 1, 3, 37, 38, 41, 85)
val bitsetX = new BitSet(60)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(100)
setBitsY.foreach( i => bitsetY.set(i))
val bitsetDiff = bitsetX.andNot( bitsetY )
assert(bitsetDiff.nextSetBit(0) === 2)
assert(bitsetDiff.nextSetBit(1) === 2)
assert(bitsetDiff.nextSetBit(2) === 2)
assert(bitsetDiff.nextSetBit(3) === 48)
assert(bitsetDiff.nextSetBit(48) === 48)
assert(bitsetDiff.nextSetBit(49) === -1)
assert(bitsetDiff.nextSetBit(65) === -1)
}
test( "andNot len(bitsetX) > len(bitsetY)" ) {
val setBitsX = Seq( 0, 1, 3, 37, 38, 41, 85)
val setBitsY = Seq( 0, 2, 3, 37, 41, 48 )
val bitsetX = new BitSet(100)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(60)
setBitsY.foreach( i => bitsetY.set(i))
val bitsetDiff = bitsetX.andNot( bitsetY )
assert(bitsetDiff.nextSetBit(0) === 1)
assert(bitsetDiff.nextSetBit(1) === 1)
assert(bitsetDiff.nextSetBit(2) === 38)
assert(bitsetDiff.nextSetBit(3) === 38)
assert(bitsetDiff.nextSetBit(38) === 38)
assert(bitsetDiff.nextSetBit(39) === 85)
assert(bitsetDiff.nextSetBit(85) === 85)
assert(bitsetDiff.nextSetBit(86) === -1)
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment