/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using TokenStream = Lucene.Net.Analysis.TokenStream; using IndexWriter = Lucene.Net.Index.IndexWriter; using Parameter = Lucene.Net.Util.Parameter; using StringHelper = Lucene.Net.Util.StringHelper; namespace Lucene.Net.Documents { /// A field is a section of a Document. Each field has two parts, a name and a /// value. Values may be free text, provided as a String or as a Reader, or they /// may be atomic keywords, which are not further processed. Such keywords may /// be used to represent dates, urls, etc. Fields are optionally stored in the /// index, so that they may be returned with hits on the document. /// [Serializable] public sealed class Field:AbstractField, Fieldable { /// Specifies whether and how a field should be stored. [Serializable] public sealed class Store:Parameter { internal Store(System.String name):base(name) { } /// Store the original field value in the index in a compressed form. This is /// useful for long documents and for binary valued fields. /// /// Please use {@link CompressionTools} instead. /// For string fields that were previously indexed and stored using compression, /// the new way to achieve this is: First add the field indexed-only (no store) /// and additionally using the same field name as a binary, stored field /// with {@link CompressionTools#compressString}. /// public static readonly Store COMPRESS = new Store("COMPRESS"); /// Store the original field value in the index. This is useful for short texts /// like a document's title which should be displayed with the results. The /// value is stored in its original form, i.e. no analyzer is used before it is /// stored. /// public static readonly Store YES = new Store("YES"); /// Do not store the field value in the index. public static readonly Store NO = new Store("NO"); } /// Specifies whether and how a field should be indexed. [Serializable] public sealed class Index:Parameter { internal Index(System.String name):base(name) { } /// Do not index the field value. This field can thus not be searched, /// but one can still access its contents provided it is /// {@link Field.Store stored}. /// public static readonly Index NO = new Index("NO"); /// Index the tokens produced by running the field's /// value through an Analyzer. This is useful for /// common text. /// public static readonly Index ANALYZED = new Index("ANALYZED"); /// this has been renamed to {@link #ANALYZED} /// [Obsolete("this has been renamed to ANALYZED")] public static readonly Index TOKENIZED; /// Index the field's value without using an Analyzer, so it can be searched. /// As no analyzer is used the value will be stored as a single term. This is /// useful for unique Ids like product numbers. /// public static readonly Index NOT_ANALYZED = new Index("NOT_ANALYZED"); /// This has been renamed to {@link #NOT_ANALYZED} /// [Obsolete("This has been renamed to NOT_ANALYZED")] public static readonly Index UN_TOKENIZED; /// Expert: Index the field's value without an Analyzer, /// and also disable the storing of norms. Note that you /// can also separately enable/disable norms by calling /// {@link Field#setOmitNorms}. No norms means that /// index-time field and document boosting and field /// length normalization are disabled. The benefit is /// less memory usage as norms take up one byte of RAM /// per indexed field for every document in the index, /// during searching. Note that once you index a given /// field with norms enabled, disabling norms will /// have no effect. In other words, for this to have the /// above described effect on a field, all instances of /// that field must be indexed with NOT_ANALYZED_NO_NORMS /// from the beginning. /// public static readonly Index NOT_ANALYZED_NO_NORMS = new Index("NOT_ANALYZED_NO_NORMS"); /// This has been renamed to /// {@link #NOT_ANALYZED_NO_NORMS} /// [Obsolete("This has been renamed to NOT_ANALYZED_NO_NORMS")] public static readonly Index NO_NORMS; /// Expert: Index the tokens produced by running the /// field's value through an Analyzer, and also /// separately disable the storing of norms. See /// {@link #NOT_ANALYZED_NO_NORMS} for what norms are /// and why you may want to disable them. /// public static readonly Index ANALYZED_NO_NORMS = new Index("ANALYZED_NO_NORMS"); static Index() { TOKENIZED = ANALYZED; UN_TOKENIZED = NOT_ANALYZED; NO_NORMS = NOT_ANALYZED_NO_NORMS; } } /// Specifies whether and how a field should have term vectors. [Serializable] public sealed class TermVector:Parameter { internal TermVector(System.String name):base(name) { } /// Do not store term vectors. public static readonly TermVector NO = new TermVector("NO"); /// Store the term vectors of each document. A term vector is a list /// of the document's terms and their number of occurrences in that document. /// public static readonly TermVector YES = new TermVector("YES"); /// Store the term vector + token position information /// /// /// /// public static readonly TermVector WITH_POSITIONS = new TermVector("WITH_POSITIONS"); /// Store the term vector + Token offset information /// /// /// /// public static readonly TermVector WITH_OFFSETS = new TermVector("WITH_OFFSETS"); /// Store the term vector + Token position and offset information /// /// /// /// /// /// /// /// public static readonly TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS"); } /// The value of the field as a String, or null. If null, the Reader value or /// binary value is used. Exactly one of stringValue(), /// readerValue(), and getBinaryValue() must be set. /// public override System.String StringValue() { return fieldsData is System.String?(System.String) fieldsData:null; } /// The value of the field as a Reader, or null. If null, the String value or /// binary value is used. Exactly one of stringValue(), /// readerValue(), and getBinaryValue() must be set. /// public override System.IO.TextReader ReaderValue() { return fieldsData is System.IO.TextReader?(System.IO.TextReader) fieldsData:null; } /// The value of the field in Binary, or null. If null, the Reader value, /// or String value is used. Exactly one of stringValue(), /// readerValue(), and getBinaryValue() must be set. /// /// This method must allocate a new byte[] if /// the {@link AbstractField#GetBinaryOffset()} is non-zero /// or {@link AbstractField#GetBinaryLength()} is not the /// full length of the byte[]. Please use {@link /// AbstractField#GetBinaryValue()} instead, which simply /// returns the byte[]. /// [Obsolete("This method must allocate a new byte[] if the AbstractField.GetBinaryOffset() is non-zero or AbstractField.GetBinaryLength() is not the full length of the byte[]. Please use AbstractField.GetBinaryValue() instead, which simply returns the byte[].")] public override byte[] BinaryValue() { if (!isBinary) return null; byte[] data = (byte[]) fieldsData; if (binaryOffset == 0 && data.Length == binaryLength) return data; //Optimization byte[] ret = new byte[binaryLength]; Array.Copy(data, binaryOffset, ret, 0, binaryLength); return ret; } /// The TokesStream for this field to be used when indexing, or null. If null, the Reader value /// or String value is analyzed to produce the indexed tokens. /// public override TokenStream TokenStreamValue() { return tokenStream; } ///

Expert: change the value of this field. This can /// be used during indexing to re-use a single Field /// instance to improve indexing speed by avoiding GC cost /// of new'ing and reclaiming Field instances. Typically /// a single {@link Document} instance is re-used as /// well. This helps most on small documents.

/// ///

Each Field instance should only be used once /// within a single {@link Document} instance. See ImproveIndexingSpeed /// for details.

///

public void SetValue(System.String value_Renamed) { if (isBinary) { throw new System.ArgumentException("cannot set a String value on a binary field"); } fieldsData = value_Renamed; } /// Expert: change the value of this field. See setValue(String). public void SetValue(System.IO.TextReader value_Renamed) { if (isBinary) { throw new System.ArgumentException("cannot set a Reader value on a binary field"); } if (isStored) { throw new System.ArgumentException("cannot set a Reader value on a stored field"); } fieldsData = value_Renamed; } /// Expert: change the value of this field. See setValue(String). public void SetValue(byte[] value_Renamed) { if (!isBinary) { throw new System.ArgumentException("cannot set a byte[] value on a non-binary field"); } fieldsData = value_Renamed; binaryLength = value_Renamed.Length; binaryOffset = 0; } /// Expert: change the value of this field. See setValue(String). public void SetValue(byte[] value_Renamed, int offset, int length) { if (!isBinary) { throw new System.ArgumentException("cannot set a byte[] value on a non-binary field"); } fieldsData = value_Renamed; binaryLength = length; binaryOffset = offset; } /// Expert: change the value of this field. See setValue(String). /// use {@link #setTokenStream} /// [Obsolete("use SetTokenStream ")] public void SetValue(TokenStream value_Renamed) { if (isBinary) { throw new System.ArgumentException("cannot set a TokenStream value on a binary field"); } if (isStored) { throw new System.ArgumentException("cannot set a TokenStream value on a stored field"); } fieldsData = null; tokenStream = value_Renamed; } /// Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. /// May be combined with stored values from stringValue() or binaryValue() /// public void SetTokenStream(TokenStream tokenStream) { this.isIndexed = true; this.isTokenized = true; this.tokenStream = tokenStream; } /// Create a field by specifying its name, value and how it will /// be saved in the index. Term vectors will not be stored in the index. /// /// /// The name of the field /// /// The string to process /// /// Whether value should be stored in the index /// /// Whether the field should be indexed, and if so, if it should /// be tokenized before indexing /// /// NullPointerException if name or value is null /// IllegalArgumentException if the field is neither stored nor indexed public Field(System.String name, System.String value_Renamed, Store store, Index index):this(name, value_Renamed, store, index, TermVector.NO) { } /// Create a field by specifying its name, value and how it will /// be saved in the index. /// /// /// The name of the field /// /// The string to process /// /// Whether value should be stored in the index /// /// Whether the field should be indexed, and if so, if it should /// be tokenized before indexing /// /// Whether term vector should be stored /// /// NullPointerException if name or value is null /// IllegalArgumentException in any of the following situations: /// /// public Field(System.String name, System.String value_Renamed, Store store, Index index, TermVector termVector):this(name, true, value_Renamed, store, index, termVector) { } /// Create a field by specifying its name, value and how it will /// be saved in the index. /// /// /// The name of the field /// /// Whether to .intern() name or not /// /// The string to process /// /// Whether value should be stored in the index /// /// Whether the field should be indexed, and if so, if it should /// be tokenized before indexing /// /// Whether term vector should be stored /// /// NullPointerException if name or value is null /// IllegalArgumentException in any of the following situations: /// /// public Field(System.String name, bool internName, System.String value_Renamed, Store store, Index index, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (value_Renamed == null) throw new System.NullReferenceException("value cannot be null"); if (name.Length == 0 && value_Renamed.Length == 0) throw new System.ArgumentException("name and value cannot both be empty"); if (index == Index.NO && store == Store.NO) throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored"); if (index == Index.NO && termVector != TermVector.NO) throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed"); if (internName) // field names are optionally interned name = StringHelper.Intern(name); this.name = name; this.fieldsData = value_Renamed; if (store == Store.YES) { this.isStored = true; this.isCompressed = false; } else if (store == Store.COMPRESS) { this.isStored = true; this.isCompressed = true; } else if (store == Store.NO) { this.isStored = false; this.isCompressed = false; } else { throw new System.ArgumentException("unknown store parameter " + store); } if (index == Index.NO) { this.isIndexed = false; this.isTokenized = false; this.omitTermFreqAndPositions = false; this.omitNorms = true; } else if (index == Index.ANALYZED) { this.isIndexed = true; this.isTokenized = true; } else if (index == Index.NOT_ANALYZED) { this.isIndexed = true; this.isTokenized = false; } else if (index == Index.NOT_ANALYZED_NO_NORMS) { this.isIndexed = true; this.isTokenized = false; this.omitNorms = true; } else if (index == Index.ANALYZED_NO_NORMS) { this.isIndexed = true; this.isTokenized = true; this.omitNorms = true; } else { throw new System.ArgumentException("unknown index parameter " + index); } this.isBinary = false; SetStoreTermVector(termVector); } /// Create a tokenized and indexed field that is not stored. Term vectors will /// not be stored. The Reader is read only when the Document is added to the index, /// i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The reader with the content /// /// NullPointerException if name or reader is null public Field(System.String name, System.IO.TextReader reader):this(name, reader, TermVector.NO) { } /// Create a tokenized and indexed field that is not stored, optionally with /// storing term vectors. The Reader is read only when the Document is added to the index, /// i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The reader with the content /// /// Whether term vector should be stored /// /// NullPointerException if name or reader is null public Field(System.String name, System.IO.TextReader reader, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (reader == null) throw new System.NullReferenceException("reader cannot be null"); this.name = StringHelper.Intern(name); // field names are interned this.fieldsData = reader; this.isStored = false; this.isCompressed = false; this.isIndexed = true; this.isTokenized = true; this.isBinary = false; SetStoreTermVector(termVector); } /// Create a tokenized and indexed field that is not stored. Term vectors will /// not be stored. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The TokenStream with the content /// /// NullPointerException if name or tokenStream is null public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO) { } /// Create a tokenized and indexed field that is not stored, optionally with /// storing term vectors. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// /// The name of the field /// /// The TokenStream with the content /// /// Whether term vector should be stored /// /// NullPointerException if name or tokenStream is null public Field(System.String name, TokenStream tokenStream, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (tokenStream == null) throw new System.NullReferenceException("tokenStream cannot be null"); this.name = StringHelper.Intern(name); // field names are interned this.fieldsData = null; this.tokenStream = tokenStream; this.isStored = false; this.isCompressed = false; this.isIndexed = true; this.isTokenized = true; this.isBinary = false; SetStoreTermVector(termVector); } /// Create a stored field with binary value. Optionally the value may be compressed. /// /// /// The name of the field /// /// The binary value /// /// How value should be stored (compressed or not) /// /// IllegalArgumentException if store is Store.NO public Field(System.String name, byte[] value_Renamed, Store store):this(name, value_Renamed, 0, value_Renamed.Length, store) { } /// Create a stored field with binary value. Optionally the value may be compressed. /// /// /// The name of the field /// /// The binary value /// /// Starting offset in value where this Field's bytes are /// /// Number of bytes to use for this Field, starting at offset /// /// How value should be stored (compressed or not) /// /// IllegalArgumentException if store is Store.NO public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store) { if (name == null) throw new System.ArgumentException("name cannot be null"); if (value_Renamed == null) throw new System.ArgumentException("value cannot be null"); this.name = StringHelper.Intern(name); // field names are interned fieldsData = value_Renamed; if (store == Store.YES) { isStored = true; isCompressed = false; } else if (store == Store.COMPRESS) { isStored = true; isCompressed = true; } else if (store == Store.NO) throw new System.ArgumentException("binary values can't be unstored"); else { throw new System.ArgumentException("unknown store parameter " + store); } isIndexed = false; isTokenized = false; omitTermFreqAndPositions = false; omitNorms = true; isBinary = true; binaryLength = length; binaryOffset = offset; SetStoreTermVector(TermVector.NO); } } }