/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using IndexReader = Lucene.Net.Index.IndexReader; using Hits = Lucene.Net.Search.Hits; using Similarity = Lucene.Net.Search.Similarity; namespace Lucene.Net.Documents { /// A Field is a section of a Document. Each Field has two parts, a name and a /// value. Values may be free text, provided as a String or as a Reader, or they /// may be atomic keywords, which are not further processed. Such keywords may /// be used to represent dates, urls, etc. Fields are optionally stored in the /// index, so that they may be returned with hits on the document. /// [Serializable] public sealed class Field { private System.String name = "body"; private System.String stringValue = null; private bool storeTermVector = false; private System.IO.TextReader readerValue = null; private bool isStored = false; private bool isIndexed = true; private bool isTokenized = true; private float boost = 1.0f; /// Sets the boost factor hits on this Field. This value will be /// multiplied into the score of all hits on this this Field of this /// document. /// ///

The boost is multiplied by {@link Document#GetBoost()} of the document /// containing this Field. If a document has multiple fields with the same /// name, all such values are multiplied together. This product is then /// multipled by the value {@link Similarity#LengthNorm(String,int)}, and /// rounded by {@link Similarity#EncodeNorm(float)} before it is stored in the /// index. One should attempt to ensure that this product does not overflow /// the range of that encoding. /// ///

/// /// /// /// /// /// public void SetBoost(float boost) { this.boost = boost; } /// Returns the boost factor for hits on any Field of this document. /// ///

The default value is 1.0. /// ///

Note: this value is not stored directly with the document in the index. /// Documents returned from {@link IndexReader#Document(int)} and {@link /// Hits#Doc(int)} may thus not have the same value present as when this Field /// was indexed. /// ///

/// /// public float GetBoost() { return boost; } /// Constructs a String-valued Field that is not tokenized, but is indexed /// and stored. Useful for non-text fields, e.g. date or url. /// public static Field Keyword(System.String name, System.String value_Renamed) { return new Field(name, value_Renamed, true, true, false); } /// Constructs a String-valued Field that is not tokenized nor indexed, /// but is stored in the index, for return with hits. /// public static Field UnIndexed(System.String name, System.String value_Renamed) { return new Field(name, value_Renamed, true, false, false); } /// Constructs a String-valued Field that is tokenized and indexed, /// and is stored in the index, for return with hits. Useful for short text /// fields, like "title" or "subject". Term vector will not be stored for this Field. /// public static Field Text(System.String name, System.String value_Renamed) { return Text(name, value_Renamed, false); } /// Constructs a Date-valued Field that is not tokenized and is indexed, /// and stored in the index, for return with hits. /// public static Field Keyword(System.String name, System.DateTime value_Renamed) { return new Field(name, DateField.DateToString(value_Renamed), true, true, false); } /// Constructs a String-valued Field that is tokenized and indexed, /// and is stored in the index, for return with hits. Useful for short text /// fields, like "title" or "subject". /// public static Field Text(System.String name, System.String value_Renamed, bool storeTermVector) { return new Field(name, value_Renamed, true, true, true, storeTermVector); } /// Constructs a String-valued Field that is tokenized and indexed, /// but that is not stored in the index. Term vector will not be stored for this Field. /// public static Field UnStored(System.String name, System.String value_Renamed) { return UnStored(name, value_Renamed, false); } /// Constructs a String-valued Field that is tokenized and indexed, /// but that is not stored in the index. /// public static Field UnStored(System.String name, System.String value_Renamed, bool storeTermVector) { return new Field(name, value_Renamed, false, true, true, storeTermVector); } /// Constructs a Reader-valued Field that is tokenized and indexed, but is /// not stored in the index verbatim. Useful for longer text fields, like /// "body". Term vector will not be stored for this Field. /// public static Field Text(System.String name, System.IO.TextReader value_Renamed) { return Text(name, value_Renamed, false); } /// Constructs a Reader-valued Field that is tokenized and indexed, but is /// not stored in the index verbatim. Useful for longer text fields, like /// "body". /// public static Field Text(System.String name, System.IO.TextReader value_Renamed, bool storeTermVector) { Field f = new Field(name, value_Renamed); f.storeTermVector = storeTermVector; return f; } /// The name of the Field (e.g., "date", "subject", "title", or "body") /// as an interned string. /// public System.String Name() { return name; } /// The value of the Field as a String, or null. If null, the Reader value /// is used. Exactly one of stringValue() and readerValue() must be set. /// public System.String StringValue() { return stringValue; } /// The value of the Field as a Reader, or null. If null, the String value /// is used. Exactly one of stringValue() and readerValue() must be set. /// public System.IO.TextReader ReaderValue() { return readerValue; } /// Create a Field by specifying all parameters except for storeTermVector, /// which is set to false. /// public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token):this(name, string_Renamed, store, index, token, false) { } /// /// The name of the Field /// /// The string to process /// /// true if the Field should store the string /// /// true if the Field should be indexed /// /// true if the Field should be tokenized /// /// true if we should store the Term Vector info /// public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token, bool storeTermVector) { if (name == null) throw new System.ArgumentException("name cannot be null"); if (string_Renamed == null) throw new System.ArgumentException("value cannot be null"); if (!index && storeTermVector) throw new System.ArgumentException("cannot store a term vector for fields that are not indexed."); this.name = String.Intern(name); // Field names are interned this.stringValue = string_Renamed; this.isStored = store; this.isIndexed = index; this.isTokenized = token; this.storeTermVector = storeTermVector; } internal Field(System.String name, System.IO.TextReader reader) { if (name == null) throw new System.ArgumentException("name cannot be null"); if (reader == null) throw new System.ArgumentException("value cannot be null"); this.name = String.Intern(name); // Field names are interned this.readerValue = reader; } /// True iff the value of the Field is to be stored in the index for return /// with search hits. It is an error for this to be true if a Field is /// Reader-valued. /// public bool IsStored() { return isStored; } /// True iff the value of the Field is to be indexed, so that it may be /// searched on. /// public bool IsIndexed() { return isIndexed; } /// True iff the value of the Field should be tokenized as text prior to /// indexing. Un-tokenized fields are indexed as a single word and may not be /// Reader-valued. /// public bool IsTokenized() { return isTokenized; } /// True iff the term or terms used to index this Field are stored as a term /// vector, available from {@link IndexReader#GetTermFreqVector(int,String)}. /// These methods do not provide access to the original content of the Field, /// only to terms used to index it. If the original content must be /// preserved, use the stored attribute instead. /// /// /// /// public bool IsTermVectorStored() { return storeTermVector; } /// Prints a Field for human consumption. public override System.String ToString() { if (isStored && isIndexed && !isTokenized) return "Keyword<" + name + ":" + stringValue + ">"; else if (isStored && !isIndexed && !isTokenized) return "Unindexed<" + name + ":" + stringValue + ">"; else if (isStored && isIndexed && isTokenized && stringValue != null) return "Text<" + name + ":" + stringValue + ">"; else if (!isStored && isIndexed && isTokenized && readerValue != null) { return "Text<" + name + ":" + readerValue + ">"; } else if (!isStored && isIndexed && isTokenized) { return "UnStored<" + name + ">"; } else { return base.ToString(); } } } }