/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using TokenStream = Lucene.Net.Analysis.TokenStream;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using Parameter = Lucene.Net.Util.Parameter;
using StringHelper = Lucene.Net.Util.StringHelper;
namespace Lucene.Net.Documents
{
/// A field is a section of a Document. Each field has two parts, a name and a
/// value. Values may be free text, provided as a String or as a Reader, or they
/// may be atomic keywords, which are not further processed. Such keywords may
/// be used to represent dates, urls, etc. Fields are optionally stored in the
/// index, so that they may be returned with hits on the document.
///
[Serializable]
public sealed class Field:AbstractField, Fieldable
{
/// Specifies whether and how a field should be stored.
[Serializable]
public sealed class Store:Parameter
{
internal Store(System.String name):base(name)
{
}
/// Store the original field value in the index in a compressed form. This is
/// useful for long documents and for binary valued fields.
///
/// Please use {@link CompressionTools} instead.
/// For string fields that were previously indexed and stored using compression,
/// the new way to achieve this is: First add the field indexed-only (no store)
/// and additionally using the same field name as a binary, stored field
/// with {@link CompressionTools#compressString}.
///
public static readonly Store COMPRESS = new Store("COMPRESS");
/// Store the original field value in the index. This is useful for short texts
/// like a document's title which should be displayed with the results. The
/// value is stored in its original form, i.e. no analyzer is used before it is
/// stored.
///
public static readonly Store YES = new Store("YES");
/// Do not store the field value in the index.
public static readonly Store NO = new Store("NO");
}
/// Specifies whether and how a field should be indexed.
[Serializable]
public sealed class Index:Parameter
{
internal Index(System.String name):base(name)
{
}
/// Do not index the field value. This field can thus not be searched,
/// but one can still access its contents provided it is
/// {@link Field.Store stored}.
///
public static readonly Index NO = new Index("NO");
/// Index the tokens produced by running the field's
/// value through an Analyzer. This is useful for
/// common text.
///
public static readonly Index ANALYZED = new Index("ANALYZED");
/// this has been renamed to {@link #ANALYZED}
///
[Obsolete("this has been renamed to ANALYZED")]
public static readonly Index TOKENIZED;
/// Index the field's value without using an Analyzer, so it can be searched.
/// As no analyzer is used the value will be stored as a single term. This is
/// useful for unique Ids like product numbers.
///
public static readonly Index NOT_ANALYZED = new Index("NOT_ANALYZED");
/// This has been renamed to {@link #NOT_ANALYZED}
///
[Obsolete("This has been renamed to NOT_ANALYZED")]
public static readonly Index UN_TOKENIZED;
/// Expert: Index the field's value without an Analyzer,
/// and also disable the storing of norms. Note that you
/// can also separately enable/disable norms by calling
/// {@link Field#setOmitNorms}. No norms means that
/// index-time field and document boosting and field
/// length normalization are disabled. The benefit is
/// less memory usage as norms take up one byte of RAM
/// per indexed field for every document in the index,
/// during searching. Note that once you index a given
/// field with norms enabled, disabling norms will
/// have no effect. In other words, for this to have the
/// above described effect on a field, all instances of
/// that field must be indexed with NOT_ANALYZED_NO_NORMS
/// from the beginning.
///
public static readonly Index NOT_ANALYZED_NO_NORMS = new Index("NOT_ANALYZED_NO_NORMS");
/// This has been renamed to
/// {@link #NOT_ANALYZED_NO_NORMS}
///
[Obsolete("This has been renamed to NOT_ANALYZED_NO_NORMS")]
public static readonly Index NO_NORMS;
/// Expert: Index the tokens produced by running the
/// field's value through an Analyzer, and also
/// separately disable the storing of norms. See
/// {@link #NOT_ANALYZED_NO_NORMS} for what norms are
/// and why you may want to disable them.
///
public static readonly Index ANALYZED_NO_NORMS = new Index("ANALYZED_NO_NORMS");
static Index()
{
TOKENIZED = ANALYZED;
UN_TOKENIZED = NOT_ANALYZED;
NO_NORMS = NOT_ANALYZED_NO_NORMS;
}
}
/// Specifies whether and how a field should have term vectors.
[Serializable]
public sealed class TermVector:Parameter
{
internal TermVector(System.String name):base(name)
{
}
/// Do not store term vectors.
public static readonly TermVector NO = new TermVector("NO");
/// Store the term vectors of each document. A term vector is a list
/// of the document's terms and their number of occurrences in that document.
///
public static readonly TermVector YES = new TermVector("YES");
/// Store the term vector + token position information
///
///
///
///
public static readonly TermVector WITH_POSITIONS = new TermVector("WITH_POSITIONS");
/// Store the term vector + Token offset information
///
///
///
///
public static readonly TermVector WITH_OFFSETS = new TermVector("WITH_OFFSETS");
/// Store the term vector + Token position and offset information
///
///
///
///
///
///
///
///
public static readonly TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
}
/// The value of the field as a String, or null. If null, the Reader value or
/// binary value is used. Exactly one of stringValue(),
/// readerValue(), and getBinaryValue() must be set.
///
public override System.String StringValue()
{
return fieldsData is System.String?(System.String) fieldsData:null;
}
/// The value of the field as a Reader, or null. If null, the String value or
/// binary value is used. Exactly one of stringValue(),
/// readerValue(), and getBinaryValue() must be set.
///
public override System.IO.TextReader ReaderValue()
{
return fieldsData is System.IO.TextReader?(System.IO.TextReader) fieldsData:null;
}
/// The value of the field in Binary, or null. If null, the Reader value,
/// or String value is used. Exactly one of stringValue(),
/// readerValue(), and getBinaryValue() must be set.
///
/// This method must allocate a new byte[] if
/// the {@link AbstractField#GetBinaryOffset()} is non-zero
/// or {@link AbstractField#GetBinaryLength()} is not the
/// full length of the byte[]. Please use {@link
/// AbstractField#GetBinaryValue()} instead, which simply
/// returns the byte[].
///
[Obsolete("This method must allocate a new byte[] if the AbstractField.GetBinaryOffset() is non-zero or AbstractField.GetBinaryLength() is not the full length of the byte[]. Please use AbstractField.GetBinaryValue() instead, which simply returns the byte[].")]
public override byte[] BinaryValue()
{
if (!isBinary)
return null;
byte[] data = (byte[]) fieldsData;
if (binaryOffset == 0 && data.Length == binaryLength)
return data; //Optimization
byte[] ret = new byte[binaryLength];
Array.Copy(data, binaryOffset, ret, 0, binaryLength);
return ret;
}
/// The TokesStream for this field to be used when indexing, or null. If null, the Reader value
/// or String value is analyzed to produce the indexed tokens.
///
public override TokenStream TokenStreamValue()
{
return tokenStream;
}
/// Expert: change the value of this field. This can
/// be used during indexing to re-use a single Field
/// instance to improve indexing speed by avoiding GC cost
/// of new'ing and reclaiming Field instances. Typically
/// a single {@link Document} instance is re-used as
/// well. This helps most on small documents.
///
/// Each Field instance should only be used once
/// within a single {@link Document} instance. See ImproveIndexingSpeed
/// for details.
///
public void SetValue(System.String value_Renamed)
{
if (isBinary)
{
throw new System.ArgumentException("cannot set a String value on a binary field");
}
fieldsData = value_Renamed;
}
/// Expert: change the value of this field. See setValue(String).
public void SetValue(System.IO.TextReader value_Renamed)
{
if (isBinary)
{
throw new System.ArgumentException("cannot set a Reader value on a binary field");
}
if (isStored)
{
throw new System.ArgumentException("cannot set a Reader value on a stored field");
}
fieldsData = value_Renamed;
}
/// Expert: change the value of this field. See setValue(String).
public void SetValue(byte[] value_Renamed)
{
if (!isBinary)
{
throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
}
fieldsData = value_Renamed;
binaryLength = value_Renamed.Length;
binaryOffset = 0;
}
/// Expert: change the value of this field. See setValue(String).
public void SetValue(byte[] value_Renamed, int offset, int length)
{
if (!isBinary)
{
throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
}
fieldsData = value_Renamed;
binaryLength = length;
binaryOffset = offset;
}
/// Expert: change the value of this field. See setValue(String).
/// use {@link #setTokenStream}
///
[Obsolete("use SetTokenStream ")]
public void SetValue(TokenStream value_Renamed)
{
if (isBinary)
{
throw new System.ArgumentException("cannot set a TokenStream value on a binary field");
}
if (isStored)
{
throw new System.ArgumentException("cannot set a TokenStream value on a stored field");
}
fieldsData = null;
tokenStream = value_Renamed;
}
/// Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
/// May be combined with stored values from stringValue() or binaryValue()
///
public void SetTokenStream(TokenStream tokenStream)
{
this.isIndexed = true;
this.isTokenized = true;
this.tokenStream = tokenStream;
}
/// Create a field by specifying its name, value and how it will
/// be saved in the index. Term vectors will not be stored in the index.
///
///
/// The name of the field
///
/// The string to process
///
/// Whether value
should be stored in the index
///
/// Whether the field should be indexed, and if so, if it should
/// be tokenized before indexing
///
/// NullPointerException if name or value is null
/// IllegalArgumentException if the field is neither stored nor indexed
public Field(System.String name, System.String value_Renamed, Store store, Index index):this(name, value_Renamed, store, index, TermVector.NO)
{
}
/// Create a field by specifying its name, value and how it will
/// be saved in the index.
///
///
/// The name of the field
///
/// The string to process
///
/// Whether value
should be stored in the index
///
/// Whether the field should be indexed, and if so, if it should
/// be tokenized before indexing
///
/// Whether term vector should be stored
///
/// NullPointerException if name or value is null
/// IllegalArgumentException in any of the following situations:
///
/// - the field is neither stored nor indexed
/// - the field is not indexed but termVector is
TermVector.YES
///
///
public Field(System.String name, System.String value_Renamed, Store store, Index index, TermVector termVector):this(name, true, value_Renamed, store, index, termVector)
{
}
/// Create a field by specifying its name, value and how it will
/// be saved in the index.
///
///
/// The name of the field
///
/// Whether to .intern() name or not
///
/// The string to process
///
/// Whether value
should be stored in the index
///
/// Whether the field should be indexed, and if so, if it should
/// be tokenized before indexing
///
/// Whether term vector should be stored
///
/// NullPointerException if name or value is null
/// IllegalArgumentException in any of the following situations:
///
/// - the field is neither stored nor indexed
/// - the field is not indexed but termVector is
TermVector.YES
///
///
public Field(System.String name, bool internName, System.String value_Renamed, Store store, Index index, TermVector termVector)
{
if (name == null)
throw new System.NullReferenceException("name cannot be null");
if (value_Renamed == null)
throw new System.NullReferenceException("value cannot be null");
if (name.Length == 0 && value_Renamed.Length == 0)
throw new System.ArgumentException("name and value cannot both be empty");
if (index == Index.NO && store == Store.NO)
throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
if (index == Index.NO && termVector != TermVector.NO)
throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
if (internName)
// field names are optionally interned
name = StringHelper.Intern(name);
this.name = name;
this.fieldsData = value_Renamed;
if (store == Store.YES)
{
this.isStored = true;
this.isCompressed = false;
}
else if (store == Store.COMPRESS)
{
this.isStored = true;
this.isCompressed = true;
}
else if (store == Store.NO)
{
this.isStored = false;
this.isCompressed = false;
}
else
{
throw new System.ArgumentException("unknown store parameter " + store);
}
if (index == Index.NO)
{
this.isIndexed = false;
this.isTokenized = false;
this.omitTermFreqAndPositions = false;
this.omitNorms = true;
}
else if (index == Index.ANALYZED)
{
this.isIndexed = true;
this.isTokenized = true;
}
else if (index == Index.NOT_ANALYZED)
{
this.isIndexed = true;
this.isTokenized = false;
}
else if (index == Index.NOT_ANALYZED_NO_NORMS)
{
this.isIndexed = true;
this.isTokenized = false;
this.omitNorms = true;
}
else if (index == Index.ANALYZED_NO_NORMS)
{
this.isIndexed = true;
this.isTokenized = true;
this.omitNorms = true;
}
else
{
throw new System.ArgumentException("unknown index parameter " + index);
}
this.isBinary = false;
SetStoreTermVector(termVector);
}
/// Create a tokenized and indexed field that is not stored. Term vectors will
/// not be stored. The Reader is read only when the Document is added to the index,
/// i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)}
/// has been called.
///
///
/// The name of the field
///
/// The reader with the content
///
/// NullPointerException if name or reader is null
public Field(System.String name, System.IO.TextReader reader):this(name, reader, TermVector.NO)
{
}
/// Create a tokenized and indexed field that is not stored, optionally with
/// storing term vectors. The Reader is read only when the Document is added to the index,
/// i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)}
/// has been called.
///
///
/// The name of the field
///
/// The reader with the content
///
/// Whether term vector should be stored
///
/// NullPointerException if name or reader is null
public Field(System.String name, System.IO.TextReader reader, TermVector termVector)
{
if (name == null)
throw new System.NullReferenceException("name cannot be null");
if (reader == null)
throw new System.NullReferenceException("reader cannot be null");
this.name = StringHelper.Intern(name); // field names are interned
this.fieldsData = reader;
this.isStored = false;
this.isCompressed = false;
this.isIndexed = true;
this.isTokenized = true;
this.isBinary = false;
SetStoreTermVector(termVector);
}
/// Create a tokenized and indexed field that is not stored. Term vectors will
/// not be stored. This is useful for pre-analyzed fields.
/// The TokenStream is read only when the Document is added to the index,
/// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)}
/// has been called.
///
///
/// The name of the field
///
/// The TokenStream with the content
///
/// NullPointerException if name or tokenStream is null
public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO)
{
}
/// Create a tokenized and indexed field that is not stored, optionally with
/// storing term vectors. This is useful for pre-analyzed fields.
/// The TokenStream is read only when the Document is added to the index,
/// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)}
/// has been called.
///
///
/// The name of the field
///
/// The TokenStream with the content
///
/// Whether term vector should be stored
///
/// NullPointerException if name or tokenStream is null
public Field(System.String name, TokenStream tokenStream, TermVector termVector)
{
if (name == null)
throw new System.NullReferenceException("name cannot be null");
if (tokenStream == null)
throw new System.NullReferenceException("tokenStream cannot be null");
this.name = StringHelper.Intern(name); // field names are interned
this.fieldsData = null;
this.tokenStream = tokenStream;
this.isStored = false;
this.isCompressed = false;
this.isIndexed = true;
this.isTokenized = true;
this.isBinary = false;
SetStoreTermVector(termVector);
}
/// Create a stored field with binary value. Optionally the value may be compressed.
///
///
/// The name of the field
///
/// The binary value
///
/// How value
should be stored (compressed or not)
///
/// IllegalArgumentException if store is Store.NO
public Field(System.String name, byte[] value_Renamed, Store store):this(name, value_Renamed, 0, value_Renamed.Length, store)
{
}
/// Create a stored field with binary value. Optionally the value may be compressed.
///
///
/// The name of the field
///
/// The binary value
///
/// Starting offset in value where this Field's bytes are
///
/// Number of bytes to use for this Field, starting at offset
///
/// How value
should be stored (compressed or not)
///
/// IllegalArgumentException if store is Store.NO
public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store)
{
if (name == null)
throw new System.ArgumentException("name cannot be null");
if (value_Renamed == null)
throw new System.ArgumentException("value cannot be null");
this.name = StringHelper.Intern(name); // field names are interned
fieldsData = value_Renamed;
if (store == Store.YES)
{
isStored = true;
isCompressed = false;
}
else if (store == Store.COMPRESS)
{
isStored = true;
isCompressed = true;
}
else if (store == Store.NO)
throw new System.ArgumentException("binary values can't be unstored");
else
{
throw new System.ArgumentException("unknown store parameter " + store);
}
isIndexed = false;
isTokenized = false;
omitTermFreqAndPositions = false;
omitNorms = true;
isBinary = true;
binaryLength = length;
binaryOffset = offset;
SetStoreTermVector(TermVector.NO);
}
}
}