/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Support;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
namespace Lucene.Net.Util
{
/// Optimized implementation of a vector of bits. This is more-or-less like
/// java.util.BitSet, but also includes the following:
///
/// - a count() method, which efficiently computes the number of one bits;
/// - optimized read from and write to disk;
/// - inlinable get() method;
/// - store and load, as bit set or d-gaps, depending on sparseness;
///
///
public sealed class BitVector : ICloneable
{
private byte[] bits;
private int size;
private int count;
/// Constructs a vector capable of holding n bits.
public BitVector(int n)
{
size = n;
bits = new byte[(size >> 3) + 1];
count = 0;
}
internal BitVector(byte[] bits, int size)
{
this.bits = bits;
this.size = size;
count = -1;
}
public System.Object Clone()
{
byte[] copyBits = new byte[bits.Length];
Array.Copy(bits, 0, copyBits, 0, bits.Length);
BitVector clone = new BitVector(copyBits, size);
clone.count = count;
return clone;
}
/// Sets the value of bit to one.
public void Set(int bit)
{
if (bit >= size)
{
throw new System. IndexOutOfRangeException("Index of bound " + bit);
}
bits[bit >> 3] |= (byte) (1 << (bit & 7));
count = - 1;
}
/// Sets the value of bit to true, and
/// returns true if bit was already set
///
public bool GetAndSet(int bit)
{
if (bit >= size)
{
throw new System. IndexOutOfRangeException("Index of bound " + bit);
}
int pos = bit >> 3;
int v = bits[pos];
int flag = 1 << (bit & 7);
if ((flag & v) != 0)
return true;
else
{
bits[pos] = (byte) (v | flag);
if (count != - 1)
count++;
return false;
}
}
/// Sets the value of bit to zero.
public void Clear(int bit)
{
if (bit >= size)
{
throw new System.IndexOutOfRangeException("Index of bound " + bit);
}
bits[bit >> 3] &= (byte) (~ (1 << (bit & 7)));
count = - 1;
}
/// Returns true if bit is one and
/// false if it is zero.
///
public bool Get(int bit)
{
System.Diagnostics.Debug.Assert(bit >= 0 && bit < size, "bit " + bit + " is out of bounds 0.." +(size - 1));
return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
}
/// Returns the number of bits in this vector. This is also one greater than
/// the number of the largest valid bit number.
///
public int Size()
{
return size;
}
/// Returns the total number of one bits in this vector. This is efficiently
/// computed and cached, so that, if the vector is not changed, no
/// recomputation is done for repeated calls.
///
public int Count()
{
// if the vector has been modified
if (count == - 1)
{
int c = 0;
int end = bits.Length;
for (int i = 0; i < end; i++)
c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
count = c;
}
return count;
}
///
/// For testing
///
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
public int GetRecomputedCount()
{
int c = 0;
int end = bits.Length;
for (int i = 0; i < end; i++)
c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
return c;
}
private static readonly byte[] BYTE_COUNTS = new byte[]{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
/// Writes this vector to the file name in Directory
/// d, in a format that can be read by the constructor
/// .
///
public void Write(Directory d, System.String name)
{
IndexOutput output = d.CreateOutput(name);
try
{
if (IsSparse())
{
WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps.
}
else
{
WriteBits(output);
}
}
finally
{
output.Close();
}
}
/// Write as a bit set
private void WriteBits(IndexOutput output)
{
output.WriteInt(Size()); // write size
output.WriteInt(Count()); // write count
output.WriteBytes(bits, bits.Length);
}
/// Write as a d-gaps list
private void WriteDgaps(IndexOutput output)
{
output.WriteInt(- 1); // mark using d-gaps
output.WriteInt(Size()); // write size
output.WriteInt(Count()); // write count
int last = 0;
int n = Count();
int m = bits.Length;
for (int i = 0; i < m && n > 0; i++)
{
if (bits[i] != 0)
{
output.WriteVInt(i - last);
output.WriteByte(bits[i]);
last = i;
n -= BYTE_COUNTS[bits[i] & 0xFF];
}
}
}
/// Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set.
private bool IsSparse()
{
// note: order of comparisons below set to favor smaller values (no binary range search.)
// note: adding 4 because we start with ((int) -1) to indicate d-gaps format.
// note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore
// multiplying count by (8+8) or (8+16) or (8+24) etc.:
// - first 8 for writing bits[i] (1 byte vs. 1 bit), and
// - second part for writing the byte-number d-gap as vint.
// note: factor is for read/write of byte-arrays being faster than vints.
int factor = 10;
if (bits.Length < (1 << 7))
return factor * (4 + (8 + 8) * Count()) < Size();
if (bits.Length < (1 << 14))
return factor * (4 + (8 + 16) * Count()) < Size();
if (bits.Length < (1 << 21))
return factor * (4 + (8 + 24) * Count()) < Size();
if (bits.Length < (1 << 28))
return factor * (4 + (8 + 32) * Count()) < Size();
return factor * (4 + (8 + 40) * Count()) < Size();
}
/// Constructs a bit vector from the file name in Directory
/// d, as written by the method.
///
public BitVector(Directory d, System.String name)
{
IndexInput input = d.OpenInput(name);
try
{
size = input.ReadInt(); // read size
if (size == - 1)
{
ReadDgaps(input);
}
else
{
ReadBits(input);
}
}
finally
{
input.Close();
}
}
/// Read as a bit set
private void ReadBits(IndexInput input)
{
count = input.ReadInt(); // read count
bits = new byte[(size >> 3) + 1]; // allocate bits
input.ReadBytes(bits, 0, bits.Length);
}
/// read as a d-gaps list
private void ReadDgaps(IndexInput input)
{
size = input.ReadInt(); // (re)read size
count = input.ReadInt(); // read count
bits = new byte[(size >> 3) + 1]; // allocate bits
int last = 0;
int n = Count();
while (n > 0)
{
last += input.ReadVInt();
bits[last] = input.ReadByte();
n -= BYTE_COUNTS[bits[last] & 0xFF];
}
}
/// Retrieve a subset of this BitVector.
///
///
/// starting index, inclusive
///
/// ending index, exclusive
///
/// subset
///
public BitVector Subset(int start, int end)
{
if (start < 0 || end > Size() || end < start)
throw new System.IndexOutOfRangeException();
// Special case -- return empty vector is start == end
if (end == start)
return new BitVector(0);
byte[] bits = new byte[(Number.URShift((end - start - 1), 3)) + 1];
int s = Number.URShift(start, 3);
for (int i = 0; i < bits.Length; i++)
{
int cur = 0xFF & this.bits[i + s];
int next = i + s + 1 >= this.bits.Length?0:0xFF & this.bits[i + s + 1];
bits[i] = (byte) ((Number.URShift(cur, (start & 7))) | ((next << (8 - (start & 7)))));
}
int bitsToClear = (bits.Length * 8 - (end - start)) % 8;
bits[bits.Length - 1] &= (byte) (~ (0xFF << (8 - bitsToClear)));
return new BitVector(bits, end - start);
}
}
}