/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Documents;
using TokenStream = Lucene.Net.Analysis.TokenStream;
namespace Lucene.Net.Index
{
/// Holds state for inverting all occurrences of a single
/// field in the document. This class doesn't do anything
/// itself; instead, it forwards the tokens produced by
/// analysis to its own consumer
/// (InvertedDocConsumerPerField). It also interacts with an
/// endConsumer (InvertedDocEndConsumerPerField).
///
sealed class DocInverterPerField:DocFieldConsumerPerField
{
private DocInverterPerThread perThread;
private FieldInfo fieldInfo;
internal InvertedDocConsumerPerField consumer;
internal InvertedDocEndConsumerPerField endConsumer;
internal DocumentsWriter.DocState docState;
internal FieldInvertState fieldState;
public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
{
this.perThread = perThread;
this.fieldInfo = fieldInfo;
docState = perThread.docState;
fieldState = perThread.fieldState;
this.consumer = perThread.consumer.AddField(this, fieldInfo);
this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
}
public override void Abort()
{
consumer.Abort();
endConsumer.Abort();
}
public override void ProcessFields(IFieldable[] fields, int count)
{
fieldState.Reset(docState.doc.Boost);
int maxFieldLength = docState.maxFieldLength;
bool doInvert = consumer.Start(fields, count);
for (int i = 0; i < count; i++)
{
IFieldable field = fields[i];
// TODO FI: this should be "genericized" to querying
// consumer if it wants to see this particular field
// tokenized.
if (field.IsIndexed && doInvert)
{
bool anyToken;
if (fieldState.length > 0)
fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
if (!field.IsTokenized)
{
// un-tokenized field
System.String stringValue = field.StringValue;
int valueLength = stringValue.Length;
perThread.singleToken.Reinit(stringValue, 0, valueLength);
fieldState.attributeSource = perThread.singleToken;
consumer.Start(field);
bool success = false;
try
{
consumer.Add();
success = true;
}
finally
{
if (!success)
docState.docWriter.SetAborting();
}
fieldState.offset += valueLength;
fieldState.length++;
fieldState.position++;
anyToken = valueLength > 0;
}
else
{
// tokenized field
TokenStream stream;
TokenStream streamValue = field.TokenStreamValue;
if (streamValue != null)
stream = streamValue;
else
{
// the field does not have a TokenStream,
// so we have to obtain one from the analyzer
System.IO.TextReader reader; // find or make Reader
System.IO.TextReader readerValue = field.ReaderValue;
if (readerValue != null)
reader = readerValue;
else
{
System.String stringValue = field.StringValue;
if (stringValue == null)
throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
perThread.stringReader.Init(stringValue);
reader = perThread.stringReader;
}
// Tokenize field and add to postingTable
stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
}
// reset the TokenStream to the first token
stream.Reset();
int startLength = fieldState.length;
try
{
int offsetEnd = fieldState.offset - 1;
bool hasMoreTokens = stream.IncrementToken();
fieldState.attributeSource = stream;
IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute();
IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute();
consumer.Start(field);
for (; ; )
{
// If we hit an exception in stream.next below
// (which is fairly common, eg if analyzer
// chokes on a given document), then it's
// non-aborting and (above) this one document
// will be marked as deleted, but still
// consume a docID
if (!hasMoreTokens)
break;
int posIncr = posIncrAttribute.PositionIncrement;
fieldState.position += posIncr;
if (fieldState.position > 0)
{
fieldState.position--;
}
if (posIncr == 0)
fieldState.numOverlap++;
bool success = false;
try
{
// If we hit an exception in here, we abort
// all buffered documents since the last
// flush, on the likelihood that the
// internal state of the consumer is now
// corrupt and should not be flushed to a
// new segment:
consumer.Add();
success = true;
}
finally
{
if (!success)
docState.docWriter.SetAborting();
}
fieldState.position++;
offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
if (++fieldState.length >= maxFieldLength)
{
if (docState.infoStream != null)
docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
break;
}
hasMoreTokens = stream.IncrementToken();
}
// trigger streams to perform end-of-stream operations
stream.End();
fieldState.offset += offsetAttribute.EndOffset;
anyToken = fieldState.length > startLength;
}
finally
{
stream.Close();
}
}
if (anyToken)
fieldState.offset += docState.analyzer.GetOffsetGap(field);
fieldState.boost *= field.Boost;
}
// LUCENE-2387: don't hang onto the field, so GC can
// reclaim
fields[i] = null;
}
consumer.Finish();
endConsumer.Finish();
}
}
}