/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; using Directory = Lucene.Net.Store.Directory; namespace Lucene.Net.Index { /// This stores a monotonically increasing set of pairs in a /// Directory. Pairs are accessed either by Term or by ordinal position the /// set. /// public sealed class TermInfosReader { private Directory directory; private System.String segment; private FieldInfos fieldInfos; private System.LocalDataStoreSlot enumerators = System.Threading.Thread.AllocateDataSlot(); private SegmentTermEnum origEnum; private long size; private Term[] indexTerms = null; private TermInfo[] indexInfos; private long[] indexPointers; private SegmentTermEnum indexEnum; private int indexDivisor = 1; private int totalIndexInterval; internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis) : this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE) { } internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize) { bool success = false; try { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis", readBufferSize), fieldInfos, false); size = origEnum.size; totalIndexInterval = origEnum.indexInterval; indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii", readBufferSize), fieldInfos, true); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } } public int GetSkipInterval() { return origEnum.skipInterval; } public int GetMaxSkipLevels() { return origEnum.maxSkipLevels; } ///

Sets the indexDivisor, which subsamples the number /// of indexed terms loaded into memory. This has a /// similar effect as {@link /// IndexWriter#setTermIndexInterval} except that setting /// must be done at indexing time while this setting can be /// set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce /// memory usage, at the expense of higher latency when /// loading a TermInfo. The default value is 1.

/// /// NOTE: you must call this before the term /// index is loaded. If the index is already loaded, /// an IllegalStateException is thrown. /// /// + @throws IllegalStateException if the term index has /// already been loaded into memory. ///
public void SetIndexDivisor(int indexDivisor) { if (indexDivisor < 1) throw new System.ArgumentException("indexDivisor must be > 0: got " + indexDivisor); if (indexTerms != null) throw new System.SystemException("index terms are already loaded"); this.indexDivisor = indexDivisor; totalIndexInterval = origEnum.indexInterval * indexDivisor; } /// Returns the indexDivisor. /// /// public int GetIndexDivisor() { return indexDivisor; } public void Close() { if (origEnum != null) origEnum.Close(); if (indexEnum != null) indexEnum.Close(); } /// Returns the number of term/value pairs in the set. internal long Size() { return size; } private SegmentTermEnum GetEnum() { SegmentTermEnum termEnum = (SegmentTermEnum) System.Threading.Thread.GetData(enumerators); if (termEnum == null) { termEnum = Terms(); System.Threading.Thread.SetData(enumerators, termEnum); } return termEnum; } private void EnsureIndexIsRead() { lock (this) { if (indexTerms != null) // index already read return ; // do nothing try { int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index indexTerms = new Term[indexSize]; indexInfos = new TermInfo[indexSize]; indexPointers = new long[indexSize]; for (int i = 0; indexEnum.Next(); i++) { indexTerms[i] = indexEnum.Term(); indexInfos[i] = indexEnum.TermInfo(); indexPointers[i] = indexEnum.indexPointer; for (int j = 1; j < indexDivisor; j++) if (!indexEnum.Next()) break; } } finally { indexEnum.Close(); indexEnum = null; } } } /// Returns the offset of the greatest index entry which is less than or equal to term. private int GetIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] int hi = indexTerms.Length - 1; while (hi >= lo) { int mid = (lo + hi) >> 1; int delta = term.CompareTo(indexTerms[mid]); if (delta < 0) hi = mid - 1; else if (delta > 0) lo = mid + 1; else return mid; } return hi; } private void SeekEnum(int indexOffset) { GetEnum().Seek(indexPointers[indexOffset], (indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]); } /// Returns the TermInfo for a Term in the set, or null. public TermInfo Get(Term term) { if (size == 0) return null; EnsureIndexIsRead(); // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = GetEnum(); if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0)) { int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1; if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) return ScanEnum(term); // no need to seek } // random-access: must seek SeekEnum(GetIndexOffset(term)); return ScanEnum(term); } /// Scans within block for matching term. private TermInfo ScanEnum(Term term) { SegmentTermEnum enumerator = GetEnum(); enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) return enumerator.TermInfo(); else return null; } /// Returns the nth term in the set. internal Term Get(int position) { if (size == 0) return null; SegmentTermEnum enumerator = GetEnum(); if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + totalIndexInterval)) return ScanEnum(position); // can avoid seek SeekEnum(position / totalIndexInterval); // must seek return ScanEnum(position); } private Term ScanEnum(int position) { SegmentTermEnum enumerator = GetEnum(); while (enumerator.position < position) if (!enumerator.Next()) return null; return enumerator.Term(); } /// Returns the position of a Term in the set or -1. internal long GetPosition(Term term) { if (size == 0) return - 1; EnsureIndexIsRead(); int indexOffset = GetIndexOffset(term); SeekEnum(indexOffset); SegmentTermEnum enumerator = GetEnum(); while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next()) { } if (term.CompareTo(enumerator.Term()) == 0) return enumerator.position; else return - 1; } /// Returns an enumeration of all the Terms and TermInfos in the set. public SegmentTermEnum Terms() { return (SegmentTermEnum) origEnum.Clone(); } /// Returns an enumeration of terms starting at or after the named term. public SegmentTermEnum Terms(Term term) { Get(term); return (SegmentTermEnum) GetEnum().Clone(); } } }