/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Documents;
using Lucene.Net.Support;
using Document = Lucene.Net.Documents.Document;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using StringHelper = Lucene.Net.Util.StringHelper;
namespace Lucene.Net.Index
{
/// Access to the Fieldable Info file that describes document fields and whether or
/// not they are indexed. Each segment has a separate Fieldable Info file. Objects
/// of this class are thread-safe for multiple readers, but only one thread can
/// be adding documents at a time, with no other reader or writer threads
/// accessing this object.
///
public sealed class FieldInfos : ICloneable
{
// Used internally (ie not written to *.fnm files) for pre-2.9 files
public const int FORMAT_PRE = - 1;
// First used in 2.9; prior to 2.9 there was no format header
public const int FORMAT_START = - 2;
internal static readonly int CURRENT_FORMAT = FORMAT_START;
internal const byte IS_INDEXED = (0x1);
internal const byte STORE_TERMVECTOR = (0x2);
internal const byte STORE_POSITIONS_WITH_TERMVECTOR =(0x4);
internal const byte STORE_OFFSET_WITH_TERMVECTOR = (0x8);
internal const byte OMIT_NORMS = (0x10);
internal const byte STORE_PAYLOADS = (0x20);
internal const byte OMIT_TERM_FREQ_AND_POSITIONS = (0x40);
private readonly System.Collections.Generic.List byNumber = new System.Collections.Generic.List();
private readonly HashMap byName = new HashMap();
private int format;
public /*internal*/ FieldInfos()
{
}
/// Construct a FieldInfos object using the directory and the name of the file
/// IndexInput
///
/// The directory to open the IndexInput from
///
/// The name of the file to open the IndexInput from in the Directory
///
/// IOException
public /*internal*/ FieldInfos(Directory d, String name)
{
IndexInput input = d.OpenInput(name);
try
{
try
{
Read(input, name);
}
catch (System.IO.IOException)
{
if (format == FORMAT_PRE)
{
// LUCENE-1623: FORMAT_PRE (before there was a
// format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
// encoding; retry with input set to pre-utf8
input.Seek(0);
input.SetModifiedUTF8StringsMode();
byNumber.Clear();
byName.Clear();
bool rethrow = false;
try
{
Read(input, name);
}
catch (Exception)
{
// Ignore any new exception & set to throw original IOE
rethrow = true;
}
if(rethrow)
{
// Preserve stack trace
throw;
}
}
else
{
// The IOException cannot be caused by
// LUCENE-1623, so re-throw it
throw;
}
}
}
finally
{
input.Close();
}
}
/// Returns a deep clone of this FieldInfos instance.
public Object Clone()
{
lock (this)
{
var fis = new FieldInfos();
int numField = byNumber.Count;
for (int i = 0; i < numField; i++)
{
var fi = (FieldInfo)byNumber[i].Clone();
fis.byNumber.Add(fi);
fis.byName[fi.name] = fi;
}
return fis;
}
}
/// Adds field info for a Document.
public void Add(Document doc)
{
lock (this)
{
System.Collections.Generic.IList fields = doc.GetFields();
foreach(IFieldable field in fields)
{
Add(field.Name, field.IsIndexed, field.IsTermVectorStored,
field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms,
false, field.OmitTermFreqAndPositions);
}
}
}
/// Returns true if any fields do not omitTermFreqAndPositions
internal bool HasProx()
{
int numFields = byNumber.Count;
for (int i = 0; i < numFields; i++)
{
FieldInfo fi = FieldInfo(i);
if (fi.isIndexed && !fi.omitTermFreqAndPositions)
{
return true;
}
}
return false;
}
/// Add fields that are indexed. Whether they have termvectors has to be specified.
///
///
/// The names of the fields
///
/// Whether the fields store term vectors or not
///
/// true if positions should be stored.
///
/// true if offsets should be stored
///
public void AddIndexed(System.Collections.Generic.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
{
lock (this)
{
foreach(string name in names)
{
Add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
}
}
}
/// Assumes the fields are not storing term vectors.
///
///
/// The names of the fields
///
/// Whether the fields are indexed or not
///
///
///
///
public void Add(System.Collections.Generic.ICollection names, bool isIndexed)
{
lock (this)
{
foreach(string name in names)
{
Add(name, isIndexed);
}
}
}
/// Calls 5 parameter add with false for all TermVector parameters.
///
///
/// The name of the Fieldable
///
/// true if the field is indexed
///
///
///
public void Add(String name, bool isIndexed)
{
lock (this)
{
Add(name, isIndexed, false, false, false, false);
}
}
/// Calls 5 parameter add with false for term vector positions and offsets.
///
///
/// The name of the field
///
/// true if the field is indexed
///
/// true if the term vector should be stored
///
public void Add(System.String name, bool isIndexed, bool storeTermVector)
{
lock (this)
{
Add(name, isIndexed, storeTermVector, false, false, false);
}
}
/// If the field is not yet known, adds it. If it is known, checks to make
/// sure that the isIndexed flag is the same as was given previously for this
/// field. If not - marks it as being indexed. Same goes for the TermVector
/// parameters.
///
///
/// The name of the field
///
/// true if the field is indexed
///
/// true if the term vector should be stored
///
/// true if the term vector with positions should be stored
///
/// true if the term vector with offsets should be stored
///
public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
{
lock (this)
{
Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
}
}
/// If the field is not yet known, adds it. If it is known, checks to make
/// sure that the isIndexed flag is the same as was given previously for this
/// field. If not - marks it as being indexed. Same goes for the TermVector
/// parameters.
///
///
/// The name of the field
///
/// true if the field is indexed
///
/// true if the term vector should be stored
///
/// true if the term vector with positions should be stored
///
/// true if the term vector with offsets should be stored
///
/// true if the norms for the indexed field should be omitted
///
public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
{
lock (this)
{
Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false);
}
}
/// If the field is not yet known, adds it. If it is known, checks to make
/// sure that the isIndexed flag is the same as was given previously for this
/// field. If not - marks it as being indexed. Same goes for the TermVector
/// parameters.
///
///
/// The name of the field
///
/// true if the field is indexed
///
/// true if the term vector should be stored
///
/// true if the term vector with positions should be stored
///
/// true if the term vector with offsets should be stored
///
/// true if the norms for the indexed field should be omitted
///
/// true if payloads should be stored for this field
///
/// true if term freqs should be omitted for this field
///
public FieldInfo Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
{
lock (this)
{
FieldInfo fi = FieldInfo(name);
if (fi == null)
{
return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
else
{
fi.Update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
return fi;
}
}
private FieldInfo AddInternal(String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
{
name = StringHelper.Intern(name);
var fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
byNumber.Add(fi);
byName[name] = fi;
return fi;
}
public int FieldNumber(System.String fieldName)
{
FieldInfo fi = FieldInfo(fieldName);
return (fi != null)?fi.number:- 1;
}
public FieldInfo FieldInfo(System.String fieldName)
{
return byName[fieldName];
}
/// Return the fieldName identified by its number.
///
///
///
///
/// the fieldName or an empty string when the field
/// with the given number doesn't exist.
///
public System.String FieldName(int fieldNumber)
{
FieldInfo fi = FieldInfo(fieldNumber);
return (fi != null) ? fi.name : "";
}
/// Return the fieldinfo object referenced by the fieldNumber.
///
///
/// the FieldInfo object or null when the given fieldNumber
/// doesn't exist.
///
public FieldInfo FieldInfo(int fieldNumber)
{
return (fieldNumber >= 0) ? byNumber[fieldNumber] : null;
}
public int Size()
{
return byNumber.Count;
}
public bool HasVectors()
{
bool hasVectors = false;
for (int i = 0; i < Size(); i++)
{
if (FieldInfo(i).storeTermVector)
{
hasVectors = true;
break;
}
}
return hasVectors;
}
public void Write(Directory d, System.String name)
{
IndexOutput output = d.CreateOutput(name);
try
{
Write(output);
}
finally
{
output.Close();
}
}
public void Write(IndexOutput output)
{
output.WriteVInt(CURRENT_FORMAT);
output.WriteVInt(Size());
for (int i = 0; i < Size(); i++)
{
FieldInfo fi = FieldInfo(i);
var bits = (byte) (0x0);
if (fi.isIndexed)
bits |= IS_INDEXED;
if (fi.storeTermVector)
bits |= STORE_TERMVECTOR;
if (fi.storePositionWithTermVector)
bits |= STORE_POSITIONS_WITH_TERMVECTOR;
if (fi.storeOffsetWithTermVector)
bits |= STORE_OFFSET_WITH_TERMVECTOR;
if (fi.omitNorms)
bits |= OMIT_NORMS;
if (fi.storePayloads)
bits |= STORE_PAYLOADS;
if (fi.omitTermFreqAndPositions)
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
output.WriteString(fi.name);
output.WriteByte(bits);
}
}
private void Read(IndexInput input, String fileName)
{
int firstInt = input.ReadVInt();
if (firstInt < 0)
{
// This is a real format
format = firstInt;
}
else
{
format = FORMAT_PRE;
}
if (format != FORMAT_PRE & format != FORMAT_START)
{
throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
}
int size;
if (format == FORMAT_PRE)
{
size = firstInt;
}
else
{
size = input.ReadVInt(); //read in the size
}
for (int i = 0; i < size; i++)
{
String name = StringHelper.Intern(input.ReadString());
byte bits = input.ReadByte();
bool isIndexed = (bits & IS_INDEXED) != 0;
bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
bool omitNorms = (bits & OMIT_NORMS) != 0;
bool storePayloads = (bits & STORE_PAYLOADS) != 0;
bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
if (input.FilePointer != input.Length())
{
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length());
}
}
}
}