/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util; import java.util.Arrays; import java.io.Serializable; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; /** An "open" BitSet implementation that allows direct access to the array of words * storing the bits. *
* Unlike java.util.bitset, the fact that bits are packed into an array of longs * is part of the interface. This allows efficient implementation of other algorithms * by someone other than the author. It also allows one to efficiently implement * alternate serialization or interchange formats. * *OpenBitSet is faster than java.util.BitSet in most operations
* and *much* faster at calculating cardinality of sets and results of set operations.
* It can also handle sets of larger cardinality (up to 64 * 2**32-1)
*
* The goals of OpenBitSet are the fastest implementation possible, and
* maximum code reuse. Extra safety and encapsulation
* may always be built on top, but if that's built in, the cost can never be removed (and
* hence people re-implement their own version in order to get better performance).
* If you want a "safe", totally encapsulated (and slower and limited) BitSet
* class, use java.util.BitSet.
*
* | cardinality | intersect_count | union | nextSetBit | get | iterator | |
|---|---|---|---|---|---|---|
| 50% full | 3.36 | 3.96 | 1.44 | 1.46 | 1.99 | 1.58 |
| 1% full | 3.31 | 3.90 | 1.04 | 0.99 |
| cardinality | intersect_count | union | nextSetBit | get | iterator | |
|---|---|---|---|---|---|---|
| 50% full | 2.50 | 3.50 | 1.00 | 1.03 | 1.12 | 1.25 |
| 1% full | 2.51 | 3.49 | 1.00 | 1.02 |
* numWords are the number of elements in the array that contain
* set bits (non-zero longs).
* numWords should be <= bits.length, and
* any existing words in the array at position >= numWords should be zero.
*
*/
public OpenBitSet(long[] bits, int numWords) {
this.bits = bits;
this.wlen = numWords;
}
@Override
public DocIdSetIterator iterator() {
return new OpenBitSetIterator(bits, wlen);
}
/** This DocIdSet implementation is cacheable. */
@Override
public boolean isCacheable() {
return true;
}
/** Returns the current capacity in bits (1 greater than the index of the last bit) */
public long capacity() { return bits.length << 6; }
/**
* Returns the current capacity of this set. Included for
* compatibility. This is *not* equal to {@link #cardinality}
*/
public long size() {
return capacity();
}
/** Returns true if there are no set bits */
public boolean isEmpty() { return cardinality()==0; }
/** Expert: returns the long[] storing the bits */
public long[] getBits() { return bits; }
/** Expert: sets a new long[] to use as the bit storage */
public void setBits(long[] bits) { this.bits = bits; }
/** Expert: gets the number of longs in the array that are in use */
public int getNumWords() { return wlen; }
/** Expert: sets the number of longs in the array that are in use */
public void setNumWords(int nWords) { this.wlen=nWords; }
/** Returns true or false for the specified bit index. */
public boolean get(int index) {
int i = index >> 6; // div 64
// signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit check.
if (i>=bits.length) return false;
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
return (bits[i] & bitmask) != 0;
}
/** Returns true or false for the specified bit index.
* The index should be less than the OpenBitSet size
*/
public boolean fastGet(int index) {
int i = index >> 6; // div 64
// signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit check.
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
return (bits[i] & bitmask) != 0;
}
/** Returns true or false for the specified bit index
*/
public boolean get(long index) {
int i = (int)(index >> 6); // div 64
if (i>=bits.length) return false;
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
return (bits[i] & bitmask) != 0;
}
/** Returns true or false for the specified bit index.
* The index should be less than the OpenBitSet size.
*/
public boolean fastGet(long index) {
int i = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
return (bits[i] & bitmask) != 0;
}
/*
// alternate implementation of get()
public boolean get1(int index) {
int i = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
return ((bits[i]>>>bit) & 0x01) != 0;
// this does a long shift and a bittest (on x86) vs
// a long shift, and a long AND, (the test for zero is prob a no-op)
// testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
}
*/
/** returns 1 if the bit is set, 0 if not.
* The index should be less than the OpenBitSet size
*/
public int getBit(int index) {
int i = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
return ((int)(bits[i]>>>bit)) & 0x01;
}
/*
public boolean get2(int index) {
int word = index >> 6; // div 64
int bit = index & 0x0000003f; // mod 64
return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
// we could right shift and check for parity bit, if it was available to us.
}
*/
/** sets a bit, expanding the set size if necessary */
public void set(long index) {
int wordNum = expandingWordNum(index);
int bit = (int)index & 0x3f;
long bitmask = 1L << bit;
bits[wordNum] |= bitmask;
}
/** Sets the bit at the specified index.
* The index should be less than the OpenBitSet size.
*/
public void fastSet(int index) {
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] |= bitmask;
}
/** Sets the bit at the specified index.
* The index should be less than the OpenBitSet size.
*/
public void fastSet(long index) {
int wordNum = (int)(index >> 6);
int bit = (int)index & 0x3f;
long bitmask = 1L << bit;
bits[wordNum] |= bitmask;
}
/** Sets a range of bits, expanding the set size if necessary
*
* @param startIndex lower index
* @param endIndex one-past the last bit to set
*/
public void set(long startIndex, long endIndex) {
if (endIndex <= startIndex) return;
int startWord = (int)(startIndex>>6);
// since endIndex is one past the end, this is index of the last
// word to be changed.
int endWord = expandingWordNum(endIndex-1);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
if (startWord == endWord) {
bits[startWord] |= (startmask & endmask);
return;
}
bits[startWord] |= startmask;
Arrays.fill(bits, startWord+1, endWord, -1L);
bits[endWord] |= endmask;
}
protected int expandingWordNum(long index) {
int wordNum = (int)(index >> 6);
if (wordNum>=wlen) {
ensureCapacity(index+1);
wlen = wordNum+1;
}
return wordNum;
}
/** clears a bit.
* The index should be less than the OpenBitSet size.
*/
public void fastClear(int index) {
int wordNum = index >> 6;
int bit = index & 0x03f;
long bitmask = 1L << bit;
bits[wordNum] &= ~bitmask;
// hmmm, it takes one more instruction to clear than it does to set... any
// way to work around this? If there were only 63 bits per word, we could
// use a right shift of 10111111...111 in binary to position the 0 in the
// correct place (using sign extension).
// Could also use Long.rotateRight() or rotateLeft() *if* they were converted
// by the JVM into a native instruction.
// bits[word] &= Long.rotateLeft(0xfffffffe,bit);
}
/** clears a bit.
* The index should be less than the OpenBitSet size.
*/
public void fastClear(long index) {
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] &= ~bitmask;
}
/** clears a bit, allowing access beyond the current set size without changing the size.*/
public void clear(long index) {
int wordNum = (int)(index >> 6); // div 64
if (wordNum>=wlen) return;
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] &= ~bitmask;
}
/** Clears a range of bits. Clearing past the end does not change the size of the set.
*
* @param startIndex lower index
* @param endIndex one-past the last bit to clear
*/
public void clear(int startIndex, int endIndex) {
if (endIndex <= startIndex) return;
int startWord = (startIndex>>6);
if (startWord >= wlen) return;
// since endIndex is one past the end, this is index of the last
// word to be changed.
int endWord = ((endIndex-1)>>6);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
// invert masks since we are clearing
startmask = ~startmask;
endmask = ~endmask;
if (startWord == endWord) {
bits[startWord] &= (startmask | endmask);
return;
}
bits[startWord] &= startmask;
int middle = Math.min(wlen, endWord);
Arrays.fill(bits, startWord+1, middle, 0L);
if (endWord < wlen) {
bits[endWord] &= endmask;
}
}
/** Clears a range of bits. Clearing past the end does not change the size of the set.
*
* @param startIndex lower index
* @param endIndex one-past the last bit to clear
*/
public void clear(long startIndex, long endIndex) {
if (endIndex <= startIndex) return;
int startWord = (int)(startIndex>>6);
if (startWord >= wlen) return;
// since endIndex is one past the end, this is index of the last
// word to be changed.
int endWord = (int)((endIndex-1)>>6);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
// invert masks since we are clearing
startmask = ~startmask;
endmask = ~endmask;
if (startWord == endWord) {
bits[startWord] &= (startmask | endmask);
return;
}
bits[startWord] &= startmask;
int middle = Math.min(wlen, endWord);
Arrays.fill(bits, startWord+1, middle, 0L);
if (endWord < wlen) {
bits[endWord] &= endmask;
}
}
/** Sets a bit and returns the previous value.
* The index should be less than the OpenBitSet size.
*/
public boolean getAndSet(int index) {
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
boolean val = (bits[wordNum] & bitmask) != 0;
bits[wordNum] |= bitmask;
return val;
}
/** Sets a bit and returns the previous value.
* The index should be less than the OpenBitSet size.
*/
public boolean getAndSet(long index) {
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
boolean val = (bits[wordNum] & bitmask) != 0;
bits[wordNum] |= bitmask;
return val;
}
/** flips a bit.
* The index should be less than the OpenBitSet size.
*/
public void fastFlip(int index) {
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] ^= bitmask;
}
/** flips a bit.
* The index should be less than the OpenBitSet size.
*/
public void fastFlip(long index) {
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] ^= bitmask;
}
/** flips a bit, expanding the set size if necessary */
public void flip(long index) {
int wordNum = expandingWordNum(index);
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] ^= bitmask;
}
/** flips a bit and returns the resulting bit value.
* The index should be less than the OpenBitSet size.
*/
public boolean flipAndGet(int index) {
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] ^= bitmask;
return (bits[wordNum] & bitmask) != 0;
}
/** flips a bit and returns the resulting bit value.
* The index should be less than the OpenBitSet size.
*/
public boolean flipAndGet(long index) {
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum] ^= bitmask;
return (bits[wordNum] & bitmask) != 0;
}
/** Flips a range of bits, expanding the set size if necessary
*
* @param startIndex lower index
* @param endIndex one-past the last bit to flip
*/
public void flip(long startIndex, long endIndex) {
if (endIndex <= startIndex) return;
int startWord = (int)(startIndex>>6);
// since endIndex is one past the end, this is index of the last
// word to be changed.
int endWord = expandingWordNum(endIndex-1);
/*** Grrr, java shifting wraps around so -1L>>>64 == -1
* for that reason, make sure not to use endmask if the bits to flip will
* be zero in the last word (redefine endWord to be the last changed...)
long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
***/
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
if (startWord == endWord) {
bits[startWord] ^= (startmask & endmask);
return;
}
bits[startWord] ^= startmask;
for (int i=startWord+1; i