/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using IndexWriter = Lucene.Net.Index.IndexWriter; using Parameter = Lucene.Net.Util.Parameter; using TokenStream = Lucene.Net.Analysis.TokenStream; namespace Lucene.Net.Documents { /// A field is a section of a Document. Each field has two parts, a name and a /// value. Values may be free text, provided as a String or as a Reader, or they /// may be atomic keywords, which are not further processed. Such keywords may /// be used to represent dates, urls, etc. Fields are optionally stored in the /// index, so that they may be returned with hits on the document. /// [Serializable] public sealed class Field : AbstractField, Fieldable { /// Specifies whether and how a field should be stored. [Serializable] public sealed class Store : Parameter { internal Store(System.String name) : base(name) { } /// Store the original field value in the index in a compressed form. This is /// useful for long documents and for binary valued fields. /// public static readonly Store COMPRESS = new Store("COMPRESS"); /// Store the original field value in the index. This is useful for short texts /// like a document's title which should be displayed with the results. The /// value is stored in its original form, i.e. no analyzer is used before it is /// stored. /// public static readonly Store YES = new Store("YES"); /// Do not store the field value in the index. public static readonly Store NO = new Store("NO"); } /// Specifies whether and how a field should be indexed. [Serializable] public sealed class Index : Parameter { internal Index(System.String name) : base(name) { } /// Do not index the field value. This field can thus not be searched, /// but one can still access its contents provided it is /// {@link Field.Store stored}. /// public static readonly Index NO = new Index("NO"); /// /// Index the tokens produced by running the field's value through /// an Analyzer. This is useful for common text. /// public static readonly Index ANALYZED = new Index("ANALYZED"); /// /// Deprecated. This has been renamed to ANALYZED. /// public static readonly Index TOKENIZED = ANALYZED; /// Index the field's value without using an Analyzer, so it can be searched. /// As no analyzer is used the value will be stored as a single term. This is /// useful for unique Ids like product numbers. /// public static readonly Index NOT_ANALYZED = new Index("NOT_ANALYZED"); /// /// Deprecated. This has been renamed to NOT_ANALYZED. /// public static readonly Index UN_TOKENIZED = NOT_ANALYZED; /// /// Expert: Index the field's value without an Analyzer, and also /// disable the storing of norms.. Note that you can also separately /// enable/disable norms by calling SetOmitNorms(bool). No norms means /// that index-time field and document boosting and field length normalization /// are disabled. The benfit is less memory ysage as norms take up one byte /// of RAM per indexed field for every document in the index, during searching. /// Note that once you index a given field with norms enabled, disabling /// norms will have no effect. In other words, for this to have the above /// described effect on a field, all instances of that field must be indexed /// with NOT_ANALYZED_NO_NORMS from the beginning. /// public static readonly Index NOT_ANALYZED_NO_NORMS = new Index("NOT_ANALYZED_NO_NORMS"); /// /// Deprecated. This has been renamed to NOT_ANALYZED_NO_NORMS. /// public static readonly Index NO_NORMS = NOT_ANALYZED_NO_NORMS; /// /// Expert: Index the tokens produced by running the field's value /// through an Analyzer, and also separately disable the storing of norms. /// See {@link #NOT_ANALYZED_NO_NORMS} for what norms are and why you /// may want to disable them. /// public static readonly Index ANALYZED_NO_NORMS = new Index("ANALYZED_NO_NORMS"); } /// Specifies whether and how a field should have term vectors. [Serializable] public sealed class TermVector : Parameter { internal TermVector(System.String name) : base(name) { } /// Do not store term vectors. public static readonly TermVector NO = new TermVector("NO"); /// Store the term vectors of each document. A term vector is a list /// of the document's terms and their number of occurences in that document. /// public static readonly TermVector YES = new TermVector("YES"); /// Store the term vector + token position information /// /// /// /// public static readonly TermVector WITH_POSITIONS = new TermVector("WITH_POSITIONS"); /// Store the term vector + Token offset information /// /// /// /// public static readonly TermVector WITH_OFFSETS = new TermVector("WITH_OFFSETS"); /// Store the term vector + Token position and offset information /// /// /// /// /// /// /// /// public static readonly TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS"); } /// The value of the field as a String, or null. If null, the Reader value, /// binary value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), getBinaryValue(), and tokenStreamValue() must be set. /// public override System.String StringValue() { return fieldsData is System.String ? (System.String) fieldsData : null; } /// The value of the field as a Reader, or null. If null, the String value, /// binary value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), getBinaryValue(), and tokenStreamValue() must be set. /// public override System.IO.TextReader ReaderValue() { return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null; } /// The value of the field in Binary, or null. If null, the Reader value, /// String value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), GetBinaryValue(), and tokenStreamValue() must be set. /// [Obsolete( "This method must allocate a new byte[] if the AbstractField.GetBinaryOffset() is non-zero or AbstractField.GetBinaryLength() is not the full length of the byte[]. Please use AbstractField.GetBinaryValue() instead, which simply returns the byte[]." )] public override byte[] BinaryValue() { return fieldsData is byte[] ? (byte[]) fieldsData : null; } /// The value of the field as a TokesStream, or null. If null, the Reader value, /// String value, or binary value is used. Exactly one of stringValue(), /// readerValue(), GetBinaryValue(), and tokenStreamValue() must be set. /// public override TokenStream TokenStreamValue() { return fieldsData is TokenStream ? (TokenStream) fieldsData : null; } ///

Expert: change the value of this field. This can /// be used during indexing to re-use a single Field /// instance to improve indexing speed by avoiding GC cost /// of new'ing and reclaiming Field instances. Typically /// a single {@link Document} instance is re-used as /// well. This helps most on small documents.

/// ///

Note that you should only use this method after the /// Field has been consumed (ie, the {@link Document} /// containing this Field has been added to the index). /// Also, each Field instance should only be used once /// within a single {@link Document} instance. See ImproveIndexingSpeed /// for details.

///
public void SetValue(System.String value_Renamed) { fieldsData = value_Renamed; } /// Expert: change the value of this field. See setValue(String). public void SetValue(System.IO.TextReader value_Renamed) { fieldsData = value_Renamed; } /// Expert: change the value of this field. See setValue(String). public void SetValue(byte[] value_Renamed) { fieldsData = value_Renamed; binaryOffset = 0; binaryLength = value_Renamed.Length; } /// Expert: change the value of this field. See setValue(String). public void SetValue(byte[] value_Renamed, int offset, int length) { fieldsData = value_Renamed; binaryOffset = offset; binaryLength = length; } /// Expert: change the value of this field. See setValue(String). public void SetValue(TokenStream value_Renamed) { fieldsData = value_Renamed; } /// Create a field by specifying its name, value and how it will /// be saved in the index. Term vectors will not be stored in the index. /// /// /// The name of the field /// /// The string to process /// /// Whether value should be stored in the index /// /// Whether the field should be indexed, and if so, if it should /// be tokenized before indexing /// /// NullPointerException if name or value is null /// IllegalArgumentException if the field is neither stored nor indexed public Field(System.String name, System.String value_Renamed, Store store, Index index) : this(name, value_Renamed, store, index, TermVector.NO) { } /// Create a field by specifying its name, value and how it will /// be saved in the index. /// /// /// The name of the field /// /// The string to process /// /// Whether value should be stored in the index /// /// Whether the field should be indexed, and if so, if it should /// be tokenized before indexing /// /// Whether term vector should be stored /// /// NullPointerException if name or value is null /// IllegalArgumentException in any of the following situations: /// /// public Field(System.String name, System.String value_Renamed, Store store, Index index, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (value_Renamed == null) throw new System.NullReferenceException("value cannot be null"); if (name.Length == 0 && value_Renamed.Length == 0) throw new System.ArgumentException("name and value cannot both be empty"); if (index == Index.NO && store == Store.NO) throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored"); if (index == Index.NO && termVector != TermVector.NO) throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed"); this.name = String.Intern(name); // field names are interned this.fieldsData = value_Renamed; if (store == Store.YES) { this.isStored = true; this.isCompressed = false; } else if (store == Store.COMPRESS) { this.isStored = true; this.isCompressed = true; } else if (store == Store.NO) { this.isStored = false; this.isCompressed = false; } else { throw new System.ArgumentException("unknown store parameter " + store); } if (index == Index.NO) { this.isIndexed = false; this.isTokenized = false; } else if (index == Index.ANALYZED) { this.isIndexed = true; this.isTokenized = true; } else if (index == Index.NOT_ANALYZED) { this.isIndexed = true; this.isTokenized = false; } else if (index == Index.NOT_ANALYZED_NO_NORMS) { this.isIndexed = true; this.isTokenized = false; this.omitNorms = true; } else if (index == Index.ANALYZED_NO_NORMS) { this.isIndexed = true; this.isTokenized = true; this.omitNorms = true; } else { throw new System.ArgumentException("unknown index parameter " + index); } this.isBinary = false; SetStoreTermVector(termVector); } /// Create a tokenized and indexed field that is not stored. Term vectors will /// not be stored. The Reader is read only when the Document is added to the index, /// i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The reader with the content /// /// NullPointerException if name or reader is null public Field(System.String name, System.IO.TextReader reader) : this(name, reader, TermVector.NO) { } /// Create a tokenized and indexed field that is not stored, optionally with /// storing term vectors. The Reader is read only when the Document is added to the index, /// i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The reader with the content /// /// Whether term vector should be stored /// /// NullPointerException if name or reader is null public Field(System.String name, System.IO.TextReader reader, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (reader == null) throw new System.NullReferenceException("reader cannot be null"); this.name = String.Intern(name); // field names are interned this.fieldsData = reader; this.isStored = false; this.isCompressed = false; this.isIndexed = true; this.isTokenized = true; this.isBinary = false; SetStoreTermVector(termVector); } /// Create a tokenized and indexed field that is not stored. Term vectors will /// not be stored. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The TokenStream with the content /// /// NullPointerException if name or tokenStream is null public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO) { } /// Create a tokenized and indexed field that is not stored, optionally with /// storing term vectors. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The TokenStream with the content /// /// Whether term vector should be stored /// /// NullPointerException if name or tokenStream is null public Field(System.String name, TokenStream tokenStream, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (tokenStream == null) throw new System.NullReferenceException("tokenStream cannot be null"); this.name = String.Intern(name); // field names are interned this.fieldsData = tokenStream; this.isStored = false; this.isCompressed = false; this.isIndexed = true; this.isTokenized = true; this.isBinary = false; SetStoreTermVector(termVector); } /// /// Create a stored field with binary value. Optionally the value may be compressed. /// /// The name of the field /// The binary value /// How value should be stored (compressed or not) /// System.ArgumentException if store is Store.NO public Field(System.String name, byte[] value_Renamed, Store store) : this(name, value_Renamed, 0, value_Renamed.Length, store) { } /// /// Create a stored field with binary value. Optionally the value may be compressed. /// /// The name of the field /// The binary value /// /// /// How value should be stored (compressed or not) /// System.ArgumentException if store is Store.NO public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store) { if (name == null) throw new System.ArgumentException("name cannot be null"); if (value_Renamed == null) throw new System.ArgumentException("value cannot be null"); this.name = String.Intern(name); fieldsData = value_Renamed; if (store == Store.YES) { isStored = true; isCompressed = false; } else if (store == Store.COMPRESS) { isStored = true; isCompressed = true; } else if (store == Store.NO) throw new System.ArgumentException("binary values can't be unstored"); else { throw new System.ArgumentException("unknown store parameter " + store); } isIndexed = false; isTokenized = false; isBinary = true; binaryLength = length; binaryOffset = offset; SetStoreTermVector(TermVector.NO); } } }