/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Lucene.Net.Documents; using Lucene.Net.Support; using Document = Lucene.Net.Documents.Document; using Directory = Lucene.Net.Store.Directory; using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; using StringHelper = Lucene.Net.Util.StringHelper; namespace Lucene.Net.Index { /// Access to the Fieldable Info file that describes document fields and whether or /// not they are indexed. Each segment has a separate Fieldable Info file. Objects /// of this class are thread-safe for multiple readers, but only one thread can /// be adding documents at a time, with no other reader or writer threads /// accessing this object. /// public sealed class FieldInfos : ICloneable { // Used internally (ie not written to *.fnm files) for pre-2.9 files public const int FORMAT_PRE = - 1; // First used in 2.9; prior to 2.9 there was no format header public const int FORMAT_START = - 2; internal static readonly int CURRENT_FORMAT = FORMAT_START; internal const byte IS_INDEXED = (0x1); internal const byte STORE_TERMVECTOR = (0x2); internal const byte STORE_POSITIONS_WITH_TERMVECTOR =(0x4); internal const byte STORE_OFFSET_WITH_TERMVECTOR = (0x8); internal const byte OMIT_NORMS = (0x10); internal const byte STORE_PAYLOADS = (0x20); internal const byte OMIT_TERM_FREQ_AND_POSITIONS = (0x40); private readonly System.Collections.Generic.List byNumber = new System.Collections.Generic.List(); private readonly HashMap byName = new HashMap(); private int format; public /*internal*/ FieldInfos() { } /// Construct a FieldInfos object using the directory and the name of the file /// IndexInput /// /// The directory to open the IndexInput from /// /// The name of the file to open the IndexInput from in the Directory /// /// IOException public /*internal*/ FieldInfos(Directory d, String name) { IndexInput input = d.OpenInput(name); try { try { Read(input, name); } catch (System.IO.IOException) { if (format == FORMAT_PRE) { // LUCENE-1623: FORMAT_PRE (before there was a // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8) // encoding; retry with input set to pre-utf8 input.Seek(0); input.SetModifiedUTF8StringsMode(); byNumber.Clear(); byName.Clear(); bool rethrow = false; try { Read(input, name); } catch (Exception) { // Ignore any new exception & set to throw original IOE rethrow = true; } if(rethrow) { // Preserve stack trace throw; } } else { // The IOException cannot be caused by // LUCENE-1623, so re-throw it throw; } } } finally { input.Close(); } } /// Returns a deep clone of this FieldInfos instance. public Object Clone() { lock (this) { var fis = new FieldInfos(); int numField = byNumber.Count; for (int i = 0; i < numField; i++) { var fi = (FieldInfo)byNumber[i].Clone(); fis.byNumber.Add(fi); fis.byName[fi.name] = fi; } return fis; } } /// Adds field info for a Document. public void Add(Document doc) { lock (this) { System.Collections.Generic.IList fields = doc.GetFields(); foreach(IFieldable field in fields) { Add(field.Name, field.IsIndexed, field.IsTermVectorStored, field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms, false, field.OmitTermFreqAndPositions); } } } /// Returns true if any fields do not omitTermFreqAndPositions internal bool HasProx() { int numFields = byNumber.Count; for (int i = 0; i < numFields; i++) { FieldInfo fi = FieldInfo(i); if (fi.isIndexed && !fi.omitTermFreqAndPositions) { return true; } } return false; } /// Add fields that are indexed. Whether they have termvectors has to be specified. /// /// /// The names of the fields /// /// Whether the fields store term vectors or not /// /// true if positions should be stored. /// /// true if offsets should be stored /// public void AddIndexed(System.Collections.Generic.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { lock (this) { foreach(string name in names) { Add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector); } } } /// Assumes the fields are not storing term vectors. /// /// /// The names of the fields /// /// Whether the fields are indexed or not /// /// /// /// public void Add(System.Collections.Generic.ICollection names, bool isIndexed) { lock (this) { foreach(string name in names) { Add(name, isIndexed); } } } /// Calls 5 parameter add with false for all TermVector parameters. /// /// /// The name of the Fieldable /// /// true if the field is indexed /// /// /// public void Add(String name, bool isIndexed) { lock (this) { Add(name, isIndexed, false, false, false, false); } } /// Calls 5 parameter add with false for term vector positions and offsets. /// /// /// The name of the field /// /// true if the field is indexed /// /// true if the term vector should be stored /// public void Add(System.String name, bool isIndexed, bool storeTermVector) { lock (this) { Add(name, isIndexed, storeTermVector, false, false, false); } } /// If the field is not yet known, adds it. If it is known, checks to make /// sure that the isIndexed flag is the same as was given previously for this /// field. If not - marks it as being indexed. Same goes for the TermVector /// parameters. /// /// /// The name of the field /// /// true if the field is indexed /// /// true if the term vector should be stored /// /// true if the term vector with positions should be stored /// /// true if the term vector with offsets should be stored /// public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { lock (this) { Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); } } /// If the field is not yet known, adds it. If it is known, checks to make /// sure that the isIndexed flag is the same as was given previously for this /// field. If not - marks it as being indexed. Same goes for the TermVector /// parameters. /// /// /// The name of the field /// /// true if the field is indexed /// /// true if the term vector should be stored /// /// true if the term vector with positions should be stored /// /// true if the term vector with offsets should be stored /// /// true if the norms for the indexed field should be omitted /// public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms) { lock (this) { Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false); } } /// If the field is not yet known, adds it. If it is known, checks to make /// sure that the isIndexed flag is the same as was given previously for this /// field. If not - marks it as being indexed. Same goes for the TermVector /// parameters. /// /// /// The name of the field /// /// true if the field is indexed /// /// true if the term vector should be stored /// /// true if the term vector with positions should be stored /// /// true if the term vector with offsets should be stored /// /// true if the norms for the indexed field should be omitted /// /// true if payloads should be stored for this field /// /// true if term freqs should be omitted for this field /// public FieldInfo Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { lock (this) { FieldInfo fi = FieldInfo(name); if (fi == null) { return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } else { fi.Update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } return fi; } } private FieldInfo AddInternal(String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { name = StringHelper.Intern(name); var fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); byNumber.Add(fi); byName[name] = fi; return fi; } public int FieldNumber(System.String fieldName) { FieldInfo fi = FieldInfo(fieldName); return (fi != null)?fi.number:- 1; } public FieldInfo FieldInfo(System.String fieldName) { return byName[fieldName]; } /// Return the fieldName identified by its number. /// /// /// /// /// the fieldName or an empty string when the field /// with the given number doesn't exist. /// public System.String FieldName(int fieldNumber) { FieldInfo fi = FieldInfo(fieldNumber); return (fi != null) ? fi.name : ""; } /// Return the fieldinfo object referenced by the fieldNumber. /// /// /// the FieldInfo object or null when the given fieldNumber /// doesn't exist. /// public FieldInfo FieldInfo(int fieldNumber) { return (fieldNumber >= 0) ? byNumber[fieldNumber] : null; } public int Size() { return byNumber.Count; } public bool HasVectors() { bool hasVectors = false; for (int i = 0; i < Size(); i++) { if (FieldInfo(i).storeTermVector) { hasVectors = true; break; } } return hasVectors; } public void Write(Directory d, System.String name) { IndexOutput output = d.CreateOutput(name); try { Write(output); } finally { output.Close(); } } public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); var bits = (byte) (0x0); if (fi.isIndexed) bits |= IS_INDEXED; if (fi.storeTermVector) bits |= STORE_TERMVECTOR; if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.storePayloads) bits |= STORE_PAYLOADS; if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS; output.WriteString(fi.name); output.WriteByte(bits); } } private void Read(IndexInput input, String fileName) { int firstInt = input.ReadVInt(); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(); //read in the size } for (int i = 0; i < size; i++) { String name = StringHelper.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length()); } } } }