/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Lucene.Net.Support; using Directory = Lucene.Net.Store.Directory; using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; namespace Lucene.Net.Util { /// Optimized implementation of a vector of bits. This is more-or-less like /// java.util.BitSet, but also includes the following: /// /// a count() method, which efficiently computes the number of one bits; /// optimized read from and write to disk; /// inlinable get() method; /// store and load, as bit set or d-gaps, depending on sparseness; /// /// public sealed class BitVector : ICloneable { private byte[] bits; private int size; private int count; /// Constructs a vector capable of holding n bits. public BitVector(int n) { size = n; bits = new byte[(size >> 3) + 1]; count = 0; } internal BitVector(byte[] bits, int size) { this.bits = bits; this.size = size; count = -1; } public System.Object Clone() { byte[] copyBits = new byte[bits.Length]; Array.Copy(bits, 0, copyBits, 0, bits.Length); BitVector clone = new BitVector(copyBits, size); clone.count = count; return clone; } /// Sets the value of bit to one. public void Set(int bit) { if (bit >= size) { throw new System. IndexOutOfRangeException("Index of bound " + bit); } bits[bit >> 3] |= (byte) (1 << (bit & 7)); count = - 1; } /// Sets the value of bit to true, and /// returns true if bit was already set /// public bool GetAndSet(int bit) { if (bit >= size) { throw new System. IndexOutOfRangeException("Index of bound " + bit); } int pos = bit >> 3; int v = bits[pos]; int flag = 1 << (bit & 7); if ((flag & v) != 0) return true; else { bits[pos] = (byte) (v | flag); if (count != - 1) count++; return false; } } /// Sets the value of bit to zero. public void Clear(int bit) { if (bit >= size) { throw new System.IndexOutOfRangeException("Index of bound " + bit); } bits[bit >> 3] &= (byte) (~ (1 << (bit & 7))); count = - 1; } /// Returns true if bit is one and /// false if it is zero. /// public bool Get(int bit) { System.Diagnostics.Debug.Assert(bit >= 0 && bit < size, "bit " + bit + " is out of bounds 0.." +(size - 1)); return (bits[bit >> 3] & (1 << (bit & 7))) != 0; } /// Returns the number of bits in this vector. This is also one greater than /// the number of the largest valid bit number. /// public int Size() { return size; } /// Returns the total number of one bits in this vector. This is efficiently /// computed and cached, so that, if the vector is not changed, no /// recomputation is done for repeated calls. /// public int Count() { // if the vector has been modified if (count == - 1) { int c = 0; int end = bits.Length; for (int i = 0; i < end; i++) c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte count = c; } return count; } /// /// For testing /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public int GetRecomputedCount() { int c = 0; int end = bits.Length; for (int i = 0; i < end; i++) c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte return c; } private static readonly byte[] BYTE_COUNTS = new byte[]{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; /// Writes this vector to the file name in Directory /// d, in a format that can be read by the constructor /// . /// public void Write(Directory d, System.String name) { IndexOutput output = d.CreateOutput(name); try { if (IsSparse()) { WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps. } else { WriteBits(output); } } finally { output.Close(); } } /// Write as a bit set private void WriteBits(IndexOutput output) { output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count output.WriteBytes(bits, bits.Length); } /// Write as a d-gaps list private void WriteDgaps(IndexOutput output) { output.WriteInt(- 1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int n = Count(); int m = bits.Length; for (int i = 0; i < m && n > 0; i++) { if (bits[i] != 0) { output.WriteVInt(i - last); output.WriteByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xFF]; } } } /// Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. private bool IsSparse() { // note: order of comparisons below set to favor smaller values (no binary range search.) // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore // multiplying count by (8+8) or (8+16) or (8+24) etc.: // - first 8 for writing bits[i] (1 byte vs. 1 bit), and // - second part for writing the byte-number d-gap as vint. // note: factor is for read/write of byte-arrays being faster than vints. int factor = 10; if (bits.Length < (1 << 7)) return factor * (4 + (8 + 8) * Count()) < Size(); if (bits.Length < (1 << 14)) return factor * (4 + (8 + 16) * Count()) < Size(); if (bits.Length < (1 << 21)) return factor * (4 + (8 + 24) * Count()) < Size(); if (bits.Length < (1 << 28)) return factor * (4 + (8 + 32) * Count()) < Size(); return factor * (4 + (8 + 40) * Count()) < Size(); } /// Constructs a bit vector from the file name in Directory /// d, as written by the method. /// public BitVector(Directory d, System.String name) { IndexInput input = d.OpenInput(name); try { size = input.ReadInt(); // read size if (size == - 1) { ReadDgaps(input); } else { ReadBits(input); } } finally { input.Close(); } } /// Read as a bit set private void ReadBits(IndexInput input) { count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits input.ReadBytes(bits, 0, bits.Length); } /// read as a d-gaps list private void ReadDgaps(IndexInput input) { size = input.ReadInt(); // (re)read size count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); bits[last] = input.ReadByte(); n -= BYTE_COUNTS[bits[last] & 0xFF]; } } /// Retrieve a subset of this BitVector. /// /// /// starting index, inclusive /// /// ending index, exclusive /// /// subset /// public BitVector Subset(int start, int end) { if (start < 0 || end > Size() || end < start) throw new System.IndexOutOfRangeException(); // Special case -- return empty vector is start == end if (end == start) return new BitVector(0); byte[] bits = new byte[(Number.URShift((end - start - 1), 3)) + 1]; int s = Number.URShift(start, 3); for (int i = 0; i < bits.Length; i++) { int cur = 0xFF & this.bits[i + s]; int next = i + s + 1 >= this.bits.Length?0:0xFF & this.bits[i + s + 1]; bits[i] = (byte) ((Number.URShift(cur, (start & 7))) | ((next << (8 - (start & 7))))); } int bitsToClear = (bits.Length * 8 - (end - start)) % 8; bits[bits.Length - 1] &= (byte) (~ (0xFF << (8 - bitsToClear))); return new BitVector(bits, end - start); } } }