/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Directory = Lucene.Net.Store.Directory;
using IndexOutput = Lucene.Net.Store.IndexOutput;
namespace Lucene.Net.Index
{
/// This stores a monotonically increasing set of pairs in a
/// Directory. A TermInfos can be written once, in order.
///
public sealed class TermInfosWriter
{
/// The file format version, a negative number.
public const int FORMAT = - 3;
private FieldInfos fieldInfos;
private IndexOutput output;
private TermInfo lastTi = new TermInfo();
private long size;
// TODO: the default values for these two parameters should be settable from
// IndexWriter. However, once that's done, folks will start setting them to
// ridiculous values and complaining that things don't work well, as with
// mergeFactor. So, let's wait until a number of folks find that alternate
// values work better. Note that both of these values are stored in the
// segment, so that it's safe to change these w/o rebuilding all indexes.
/// Expert: The fraction of terms in the "dictionary" which should be stored
/// in RAM. Smaller values use more memory, but make searching slightly
/// faster, while larger values use less memory and make searching slightly
/// slower. Searching is typically not dominated by dictionary lookup, so
/// tweaking this is rarely useful.
///
internal int indexInterval = 128;
/// Expert: The fraction of {@link TermDocs} entries stored in skip tables,
/// used to accellerate {@link TermDocs#SkipTo(int)}. Larger values result in
/// smaller indexes, greater acceleration, but fewer accelerable cases, while
/// smaller values result in bigger indexes, less acceleration and more
/// accelerable cases. More detailed experiments would be useful here.
///
internal int skipInterval = 16;
/// Expert: The maximum number of skip levels. Smaller values result in
/// slightly smaller indexes, but slower skipping in big posting lists.
///
internal int maxSkipLevels = 10;
private long lastIndexPointer;
private bool isIndex;
private char[] lastTermText = new char[10];
private int lastTermTextLength;
private int lastFieldNumber = - 1;
private char[] termTextBuffer = new char[10];
private TermInfosWriter other;
public TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval)
{
Initialize(directory, segment, fis, interval, false);
other = new TermInfosWriter(directory, segment, fis, interval, true);
other.other = this;
}
private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex)
{
Initialize(directory, segment, fis, interval, isIndex);
}
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
{
indexInterval = interval;
fieldInfos = fis;
isIndex = isi;
output = directory.CreateOutput(segment + (isIndex ? ".tii" : ".tis"));
output.WriteInt(FORMAT); // write format
output.WriteLong(0); // leave space for size
output.WriteInt(indexInterval); // write indexInterval
output.WriteInt(skipInterval); // write skipInterval
output.WriteInt(maxSkipLevels); // write maxSkipLevels
}
internal void Add(Term term, TermInfo ti)
{
int length = term.text.Length;
if (termTextBuffer.Length < length)
{
termTextBuffer = new char[(int) (length * 1.25)];
}
int i = 0;
System.Collections.Generic.IEnumerator chars = term.text.GetEnumerator();
while (chars.MoveNext())
{
termTextBuffer[i++] = (char)chars.Current;
}
Add(fieldInfos.FieldNumber(term.field), termTextBuffer, 0, length, ti);
}
// Currently used only by assert statement
private int CompareToLastTerm(int fieldNumber, char[] termText, int start, int length)
{
int pos = 0;
if (lastFieldNumber != fieldNumber)
{
int cmp = String.CompareOrdinal(fieldInfos.FieldName(lastFieldNumber), fieldInfos.FieldName(fieldNumber));
// If there is a field named "" (empty string) then we
// will get 0 on this comparison, yet, it's "OK". But
// it's not OK if two different field numbers map to
// the same name.
if (cmp != 0 || lastFieldNumber != - 1)
return cmp;
}
while (pos < length && pos < lastTermTextLength)
{
char c1 = lastTermText[pos];
char c2 = termText[pos + start];
if (c1 < c2)
return - 1;
else if (c1 > c2)
return 1;
pos++;
}
if (pos < lastTermTextLength)
// Last term was longer
return 1;
else if (pos < length)
// Last term was shorter
return - 1;
else
return 0;
}
/// Adds a new <, TermInfo> pair to the set.
/// Term must be lexicographically greater than all previous Terms added.
/// TermInfo pointers must be positive and greater than all previous.
///
internal void Add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti)
{
System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 ||
(isIndex && termTextLength == 0 && lastTermTextLength == 0),
"Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + "(number " + fieldNumber + ")" +
" lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
" text=" + new String(termText, termTextStart, termTextLength) + " lastText=" + new String(lastTermText, 0, lastTermTextLength));
System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");
if (!isIndex && size % indexInterval == 0)
other.Add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term
WriteTerm(fieldNumber, termText, termTextStart, termTextLength); // write term
output.WriteVInt(ti.docFreq); // write doc freq
output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
output.WriteVLong(ti.proxPointer - lastTi.proxPointer);
if (ti.docFreq >= skipInterval)
{
output.WriteVInt(ti.skipOffset);
}
if (isIndex)
{
output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
lastIndexPointer = other.output.GetFilePointer(); // write pointer
}
if (lastTermText.Length < termTextLength)
{
lastTermText = new char[(int) (termTextLength * 1.25)];
}
Array.Copy(termText, termTextStart, lastTermText, 0, termTextLength);
lastTermTextLength = termTextLength;
lastFieldNumber = fieldNumber;
lastTi.Set(ti);
size++;
}
private void WriteTerm(int fieldNumber, char[] termText, int termTextStart, int termTextLength)
{
// Compute prefix in common with last term:
int start = 0;
int limit = termTextLength < lastTermTextLength ? termTextLength : lastTermTextLength;
while (start < limit)
{
if (termText[termTextStart + start] != lastTermText[start])
break;
start++;
}
int length = termTextLength - start;
output.WriteVInt(start); // write shared prefix length
output.WriteVInt(length); // write delta length
output.WriteChars(termText, start + termTextStart, length); // write delta chars
output.WriteVInt(fieldNumber); // write field num
}
/// Called to complete TermInfos creation.
internal void Close()
{
output.Seek(4); // write size after format
output.WriteLong(size);
output.Close();
if (!isIndex)
other.Close();
}
}
}