/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using ArrayUtil = Lucene.Net.Util.ArrayUtil; namespace Lucene.Net.Index { ///

This class implements , which /// is passed each token produced by the analyzer on each /// field. It stores these tokens in a hash table, and /// allocates separate byte streams per token. Consumers of /// this class, eg and ///, write their own byte streams /// under each term. ///

sealed class TermsHash : InvertedDocConsumer { internal TermsHashConsumer consumer; internal TermsHash nextTermsHash; internal int bytesPerPosting; internal int postingsFreeChunk; internal DocumentsWriter docWriter; private RawPostingList[] postingsFreeList = new RawPostingList[1]; private int postingsFreeCount; private int postingsAllocCount; internal bool trackAllocations; public TermsHash(DocumentsWriter docWriter, bool trackAllocations, TermsHashConsumer consumer, TermsHash nextTermsHash) { this.docWriter = docWriter; this.consumer = consumer; this.nextTermsHash = nextTermsHash; this.trackAllocations = trackAllocations; // Why + 4*POINTER_NUM_BYTE below? // +1: Posting is referenced by postingsFreeList array // +3: Posting is referenced by hash, which // targets 25-50% fill factor; approximate this // as 3X # pointers bytesPerPosting = consumer.BytesPerPosting() + 4 * DocumentsWriter.POINTER_NUM_BYTE; postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting); } internal override InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread) { return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null); } internal TermsHashPerThread AddThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) { return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread); } internal override void SetFieldInfos(FieldInfos fieldInfos) { this.fieldInfos = fieldInfos; consumer.SetFieldInfos(fieldInfos); } // NOTE: do not make this sync'd; it's not necessary (DW // ensures all other threads are idle), and it leads to // deadlock public override void Abort() { consumer.Abort(); if (nextTermsHash != null) nextTermsHash.Abort(); } internal void ShrinkFreePostings(IDictionary> threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = 1; if (newSize != postingsFreeList.Length) { if (postingsFreeCount > newSize) { if (trackAllocations) { docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); } postingsFreeCount = newSize; postingsAllocCount = newSize; } RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } } internal override void CloseDocStore(SegmentWriteState state) { lock (this) { consumer.CloseDocStore(state); if (nextTermsHash != null) nextTermsHash.CloseDocStore(state); } } internal override void Flush(IDictionary> threadsAndFields, SegmentWriteState state) { lock (this) { var childThreadsAndFields = new Dictionary>(); Dictionary> nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new Dictionary>(); } else nextThreadsAndFields = null; foreach (var entry in threadsAndFields) { TermsHashPerThread perThread = (TermsHashPerThread) entry.Key; ICollection fields = entry.Value; var fieldsIt = fields.GetEnumerator(); ICollection childFields = new HashSet(); ICollection nextChildFields; if (nextTermsHash != null) { nextChildFields = new HashSet(); } else nextChildFields = null; while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField) fieldsIt.Current; childFields.Add(perField.consumer); if (nextTermsHash != null) nextChildFields.Add(perField.nextPerField); } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) nextTermsHash.Flush(nextThreadsAndFields, state); } } public override bool FreeRAM() { if (!trackAllocations) return false; bool any; long bytesFreed = 0; lock (this) { int numToFree; if (postingsFreeCount >= postingsFreeChunk) numToFree = postingsFreeChunk; else numToFree = postingsFreeCount; any = numToFree > 0; if (any) { for (int i = postingsFreeCount - numToFree; i < postingsFreeCount; i++) { postingsFreeList[i] = null; } //Arrays.fill(postingsFreeList, postingsFreeCount - numToFree, postingsFreeCount, null); postingsFreeCount -= numToFree; postingsAllocCount -= numToFree; bytesFreed = -numToFree * bytesPerPosting; any = true; } } if (any) { docWriter.BytesAllocated(bytesFreed); } if (nextTermsHash != null) any |= nextTermsHash.FreeRAM(); return any; } public void RecyclePostings(RawPostingList[] postings, int numPostings) { lock (this) { System.Diagnostics.Debug.Assert(postings.Length >= numPostings); // Move all Postings from this ThreadState back to our // free list. We pre-allocated this array while we were // creating Postings to make sure it's large enough System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length); Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings); postingsFreeCount += numPostings; } } public void GetPostings(RawPostingList[] postings) { lock (this) { System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start")); System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length); System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount); int numToCopy; if (postingsFreeCount < postings.Length) numToCopy = postingsFreeCount; else numToCopy = postings.Length; int start = postingsFreeCount - numToCopy; System.Diagnostics.Debug.Assert(start >= 0); System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length); System.Diagnostics.Debug.Assert(numToCopy <= postings.Length); Array.Copy(postingsFreeList, start, postings, 0, numToCopy); // Directly allocate the remainder if any if (numToCopy != postings.Length) { int extra = postings.Length - numToCopy; int newPostingsAllocCount = postingsAllocCount + extra; consumer.CreatePostings(postings, numToCopy, extra); System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create")); postingsAllocCount += extra; if (trackAllocations) docWriter.BytesAllocated(extra * bytesPerPosting); if (newPostingsAllocCount > postingsFreeList.Length) // Pre-allocate the postingsFreeList so it's large // enough to hold all postings we've given out postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)]; } postingsFreeCount -= numToCopy; if (trackAllocations) docWriter.BytesUsed(postings.Length * bytesPerPosting); } } } }