/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using FlagsAttribute = Lucene.Net.Analysis.Tokenattributes.FlagsAttribute;
using OffsetAttribute = Lucene.Net.Analysis.Tokenattributes.OffsetAttribute;
using PayloadAttribute = Lucene.Net.Analysis.Tokenattributes.PayloadAttribute;
using PositionIncrementAttribute = Lucene.Net.Analysis.Tokenattributes.PositionIncrementAttribute;
using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
using TypeAttribute = Lucene.Net.Analysis.Tokenattributes.TypeAttribute;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using Attribute = Lucene.Net.Util.IAttribute;
using AttributeImpl = Lucene.Net.Util.AttributeImpl;
using AttributeSource = Lucene.Net.Util.AttributeSource;
using Lucene.Net.Util;
namespace Lucene.Net.Analysis
{
// JAVA: src/java/org/apache/lucene/analysis/TokenStream.java
///
/// A enumerates the sequence of tokens, either from
/// s of a
/// or from query text.
///
///
///
/// A new API has been introduced with Lucene 2.9. This API
/// has moved from being based to based. While
/// still exists in 2.9 as a convenience class, the preferred way
/// to store the information of a is to use s.
///
///
/// TokenStream now extends , which provides
/// access to all of the token s for the TokenStream.
/// Note that only one instance per is created and reused
/// for every token. This approach reduces object creation and allows local
/// caching of references to the s. See
/// for further details.
///
///
/// The workflow of the new TokenStream API is as follows:
///
///
/// -
/// Instantiation of / s which add/get
/// attributes to/from the .
///
/// -
/// The consumer calls .
///
/// -
/// The consumer retrieves attributes from the stream and stores local
/// references to all attributes it wants to access.
///
/// -
/// The consumer calls until it returns false and
/// consumes the attributes after each call.
///
/// -
/// The consumer calls so that any end-of-stream operations
/// can be performed.
///
/// -
/// The consumer calls to release any resource when finished
/// using the TokenStream
///
///
///
public abstract class TokenStream : AttributeSource
{
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private static readonly AttributeFactory DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY = new TokenWrapperAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private TokenWrapper tokenWrapper;
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private static bool onlyUseNewAPI = false;
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private MethodSupport supportedMethods;
private void InitBlock()
{
// REMOVE: in 3.0
#pragma warning disable 618
supportedMethods = GetSupportedMethods(this.GetType());
#pragma warning restore 618
}
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private sealed class MethodSupport
{
internal bool hasIncrementToken;
internal bool hasReusableNext;
internal bool hasNext;
internal MethodSupport(System.Type clazz)
{
hasIncrementToken = IsMethodOverridden(clazz, "IncrementToken", METHOD_NO_PARAMS);
hasReusableNext = IsMethodOverridden(clazz, "Next", METHOD_TOKEN_PARAM);
hasNext = IsMethodOverridden(clazz, "Next", METHOD_NO_PARAMS);
}
private static bool IsMethodOverridden(System.Type clazz, System.String name, System.Type[] params_Renamed)
{
try
{
return clazz.GetMethod(name, params_Renamed).DeclaringType != typeof(TokenStream);
}
catch (System.MethodAccessException e)
{
// should not happen
throw new System.SystemException(e.Message, e);
}
}
private static readonly System.Type[] METHOD_NO_PARAMS = new System.Type[0];
private static readonly System.Type[] METHOD_TOKEN_PARAM = new System.Type[]{typeof(Token)};
}
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private static readonly Support.Dictionary knownMethodSupport = new Support.Dictionary();
// {{Aroush-2.9 Port issue, need to mimic java's IdentityHashMap
/*
* From Java docs:
* This class implements the Map interface with a hash table, using
* reference-equality in place of object-equality when comparing keys
* (and values). In other words, in an IdentityHashMap, two keys k1 and k2
* are considered equal if and only if (k1==k2). (In normal Map
* implementations (like HashMap) two keys k1 and k2 are considered
* equal if and only if (k1==null ? k2==null : k1.equals(k2)).)
*/
// Aroush-2.9}}
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private static MethodSupport GetSupportedMethods(System.Type clazz)
{
MethodSupport supportedMethods;
lock (knownMethodSupport)
{
supportedMethods = knownMethodSupport[clazz];
if (supportedMethods == null)
{
knownMethodSupport.Add(clazz, supportedMethods = new MethodSupport(clazz));
}
}
return supportedMethods;
}
// REMOVE: in 3.0
[Obsolete("Remove this when old API is removed! ")]
private sealed class TokenWrapperAttributeFactory:AttributeFactory
{
private AttributeFactory delegate_Renamed;
internal TokenWrapperAttributeFactory(AttributeFactory delegate_Renamed)
{
this.delegate_Renamed = delegate_Renamed;
}
public override AttributeImpl CreateAttributeInstance(System.Type attClass)
{
return attClass.IsAssignableFrom(typeof(TokenWrapper))?new TokenWrapper():delegate_Renamed.CreateAttributeInstance(attClass);
}
// this is needed for TeeSinkTokenStream's check for compatibility of AttributeSource,
// so two TokenStreams using old API have the same AttributeFactory wrapped by this one.
public override bool Equals(System.Object other)
{
if (this == other)
return true;
if (other is TokenWrapperAttributeFactory)
{
TokenWrapperAttributeFactory af = (TokenWrapperAttributeFactory) other;
return this.delegate_Renamed.Equals(af.delegate_Renamed);
}
return false;
}
public override int GetHashCode()
{
return delegate_Renamed.GetHashCode() ^ 0x0a45ff31;
}
}
/// A using the default attribute factory.
#pragma warning disable 618
protected internal TokenStream() :
base( onlyUseNewAPI ? AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY : TokenStream.DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY)
{
InitBlock();
tokenWrapper = InitTokenWrapper(null);
Check();
}
#pragma warning restore 618
/// A that uses the same attributes as the supplied one.
protected internal TokenStream(AttributeSource input):base(input)
{
InitBlock();
// REMOVE: in 3.0
#pragma warning disable 618
tokenWrapper = InitTokenWrapper(input);
Check();
#pragma warning restore 618
}
///
/// A using the supplied AttributeFactory for creating
/// new instances.
///
#pragma warning disable 618
protected internal TokenStream(AttributeFactory factory)
:base( onlyUseNewAPI? factory: new TokenWrapperAttributeFactory(factory))
{
InitBlock();
// REMOVE: in 3.0
tokenWrapper = InitTokenWrapper(null);
Check();
#pragma warning restore 618
}
[Obsolete("Remove this when old API is removed! ")]
private TokenWrapper InitTokenWrapper(AttributeSource input)
{
if (onlyUseNewAPI)
{
// no wrapper needed
return null;
}
else
{
// if possible get the wrapper from the filter's input stream
if (input is TokenStream && ((TokenStream) input).tokenWrapper != null)
{
return ((TokenStream) input).tokenWrapper;
}
// check that all attributes are implemented by the same TokenWrapper instance
IAttribute att = AddAttribute(typeof(TermAttribute));
if (att is TokenWrapper && AddAttribute(typeof(TypeAttribute)) == att && AddAttribute(typeof(PositionIncrementAttribute)) == att && AddAttribute(typeof(FlagsAttribute)) == att && AddAttribute(typeof(OffsetAttribute)) == att && AddAttribute(typeof(PayloadAttribute)) == att)
{
return (TokenWrapper) att;
}
else
{
throw new System.NotSupportedException("If onlyUseNewAPI is disabled, all basic Attributes must be implemented by the internal class " + "TokenWrapper. Please make sure, that all TokenStreams/TokenFilters in this chain have been " + "instantiated with this flag disabled and do not add any custom instances for the basic Attributes!");
}
}
}
[Obsolete("Remove this when old API is removed! ")]
private void Check()
{
if (onlyUseNewAPI && !supportedMethods.hasIncrementToken)
{
throw new System.NotSupportedException(GetType().FullName + " does not implement incrementToken() which is needed for onlyUseNewAPI.");
}
// a TokenStream subclass must at least implement one of the methods!
if (!(supportedMethods.hasIncrementToken || supportedMethods.hasNext || supportedMethods.hasReusableNext))
{
throw new System.NotSupportedException(GetType().FullName + " does not implement any of incrementToken(), next(Token), next().");
}
}
///
///
/// For extra performance you can globally enable the new
/// API using s. There will be a
/// small, but in most cases negligible performance increase by enabling this,
/// but it only works if all TokenStreams use the new API and
/// implement . This setting can only be enabled
/// globally.
///
///
///
///
/// This setting only affects s instantiated after this
/// call. All TokenStreams already created use the other setting.
///
///
/// All core s are compatible with this setting, if you have
/// your own TokenStreams that are also compatible, you should enable
/// this.
///
///
/// When enabled, tokenization may throw s.
/// If the whole tokenizer chain is not compatible e.g. one of the
/// TokenStreams does not implement the new TokenStream API.
///
///
/// The default is false, so there is the fallback to the old API
/// available.
///
///
/// When enabled, it make throw this exception
[Obsolete("This setting will no longer be needed in Lucene 3.0 as the old API will be removed.")]
public static void SetOnlyUseNewAPI(bool onlyUseNewAPI)
{
#pragma warning disable 618
TokenStream.onlyUseNewAPI = onlyUseNewAPI;
#pragma warning restore 618
}
///
/// Returns true if the new API is used, otherwise false.
///
[Obsolete("This setting will no longer be needed in Lucene 3.0 as the old API will be removed.")]
public static bool GetOnlyUseNewAPI()
{
#pragma warning disable 618
return onlyUseNewAPI;
#pragma warning restore 618
}
///
///
/// Consumers, like , use this
/// method to advance the stream to the next token. Implementing classes must
/// implement this method and update the appropriate s
/// with the attributes of the next token.
///
///
///
/// The producer must make no assumptions about the attributes after the
/// method has been returned: the caller may arbitrarily change it. If the
/// producer needs to preserve the state for subsequent calls, it can use
/// to create a copy of the
/// current attribute state.
///
///
/// This method is called for every token of a document, so an efficient
/// implementation is crucial for good performance. To avoid calls to
/// and or downcasts,
/// references to all s that this stream uses should be
/// retrieved during instantiation.
///
///
/// To ensure that filters and consumers know which attributes are available,
/// the attributes must be added during instantiation. Filters and consumers
/// are not required to check for availability of attributes in
/// .
///
///
/// true if the stream has not reached its end, otherwise false.
public virtual bool IncrementToken()
{
// CHANGE: IncrementToken becomes an empty abstract method in 3.0
#pragma warning disable 618
System.Diagnostics.Debug.Assert(tokenWrapper != null);
Token token;
if (supportedMethods.hasReusableNext)
{
token = Next(tokenWrapper.delegate_Renamed);
}
else
{
System.Diagnostics.Debug.Assert(supportedMethods.hasNext);
token = Next();
}
if (token == null)
return false;
tokenWrapper.delegate_Renamed = token;
return true;
#pragma warning restore 618
}
///
/// This method is called by the consumer after the last token has been
/// consumed, after returned false
/// Using the new TokenStream API, Streams implementing the old API
/// should upgrade to use this feature.
///
///
///
/// This method can be used to perform any end-of-stream operations, like
/// setting the final offset of a stream. The final offset of a stream might
/// differ from the offset of the last token. e.g. in case one or more whitespaces
/// followed after the last token and a was used.
///
///
///
public virtual void End()
{
// do nothing by default
}
///
/// Returns the next token in the stream, or null at end-of-stream.
///
///
///
/// The input Token should be used as the Token that is returned when possible, which will
/// give the fastest tokenization performance. However, this is not required. A new Token may be
/// returned. Callers may re-use a single Token instance for successive calls
/// to this method.
///
///
/// This implicitly defines a "contract" between consumers, the callers of this
/// method, and producers, the implementations of this method that are the source
/// for tokens:
///
///
/// -
/// A consumer must fully consume the previously returned
/// before calling this method again.
///
/// -
/// A producer must call before setting the fields in
/// it and returning it.
///
///
///
/// Also, the producer must make no assumptions about a after it
/// has been returned: the caller may arbitrarily change it. If the producer
/// needs to hold onto the for subsequent calls, it must clone()
/// it before storing it. Note that a is considered a
/// consumer.
///
///
///
/// A that may or may not be used to return;
/// this parameter should never be null. The callee is not required to
/// check for null before using it, but it is a good idea to assert that
/// it is not null.
///
///
/// The next in the stream or null if the end-of-stream was hit.
///
[Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
public virtual Token Next(Token reusableToken)
{
System.Diagnostics.Debug.Assert(reusableToken != null);
if (tokenWrapper == null)
throw new System.NotSupportedException("This TokenStream only supports the new Attributes API.");
if (supportedMethods.hasIncrementToken)
{
tokenWrapper.delegate_Renamed = reusableToken;
return IncrementToken()?tokenWrapper.delegate_Renamed:null;
}
else
{
System.Diagnostics.Debug.Assert(supportedMethods.hasNext);
return Next();
}
}
///
/// Returns the next in the stream, or null at EOS.
///
///
///
/// The returned Token is a "full private copy" (not re-used across
/// calls to ) but will be slower than calling
/// or using the new
/// method with the new API.
///
///
[Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
public virtual Token Next()
{
#pragma warning disable 618
if (tokenWrapper == null)
throw new System.NotSupportedException("This TokenStream only supports the new Attributes API.");
Token nextToken;
if (supportedMethods.hasIncrementToken)
{
Token savedDelegate = tokenWrapper.delegate_Renamed;
tokenWrapper.delegate_Renamed = new Token();
nextToken = IncrementToken()?tokenWrapper.delegate_Renamed:null;
tokenWrapper.delegate_Renamed = savedDelegate;
}
else
{
System.Diagnostics.Debug.Assert(supportedMethods.hasReusableNext);
nextToken = Next(new Token());
}
if (nextToken != null)
{
Lucene.Net.Index.Payload p = nextToken.GetPayload();
if (p != null)
{
nextToken.SetPayload((Lucene.Net.Index.Payload) p.Clone());
}
}
return nextToken;
#pragma warning restore 618
}
///
/// Resets this stream to the beginning. This is an optional operation, so
/// subclasses may or may not implement this method. is not needed for
/// the standard indexing process.
///
///
///
/// However, if the tokens of a TokenStream are intended to be
/// consumed more than once, it is necessary to implement .
/// Note that if your TokenStream caches tokens and feeds them back again
/// after a reset, it is imperative that you clone the tokens when you
/// store them away on the first pass as well as when you return
/// them on future passes after .
///
///
public virtual void Reset()
{
}
/// Releases resources associated with this stream.
public virtual void Close()
{
}
}
}