/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Index; namespace Lucene.Net.Analysis.Miscellaneous { /// /// Joins two token streams and leaves the last token of the first stream available /// to be used when updating the token values in the second stream based on that token. /// /// The default implementation adds last prefix token end offset to the suffix token start and end offsets. ///

/// NOTE: This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than /// the ones located in Lucene.Net.Analysis.TokenAttributes. ///

public class PrefixAwareTokenFilter : TokenStream { private readonly IFlagsAttribute _flagsAtt; private readonly IOffsetAttribute _offsetAtt; private readonly IFlagsAttribute _pFlagsAtt; private readonly IOffsetAttribute _pOffsetAtt; private readonly IPayloadAttribute _pPayloadAtt; private readonly IPositionIncrementAttribute _pPosIncrAtt; private readonly ITermAttribute _pTermAtt; private readonly ITypeAttribute _pTypeAtt; private readonly IPayloadAttribute _payloadAtt; private readonly IPositionIncrementAttribute _posIncrAtt; private readonly Token _previousPrefixToken = new Token(); private readonly Token _reusableToken = new Token(); private readonly ITermAttribute _termAtt; private readonly ITypeAttribute _typeAtt; private bool _prefixExhausted; public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = AddAttribute(); _posIncrAtt = AddAttribute(); _payloadAtt = AddAttribute(); _offsetAtt = AddAttribute(); _typeAtt = AddAttribute(); _flagsAtt = AddAttribute(); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = prefix.AddAttribute(); _pPosIncrAtt = prefix.AddAttribute(); _pPayloadAtt = prefix.AddAttribute(); _pOffsetAtt = prefix.AddAttribute(); _pTypeAtt = prefix.AddAttribute(); _pFlagsAtt = prefix.AddAttribute(); } public TokenStream Prefix { get; set; } public TokenStream Suffix { get; set; } public override sealed bool IncrementToken() { if (!_prefixExhausted) { Token nextToken = GetNextPrefixInputToken(_reusableToken); if (nextToken == null) { _prefixExhausted = true; } else { _previousPrefixToken.Reinit(nextToken); // Make it a deep copy Payload p = _previousPrefixToken.Payload; if (p != null) { _previousPrefixToken.Payload = (Payload) p.Clone(); } SetCurrentToken(nextToken); return true; } } Token nextSuffixToken = GetNextSuffixInputToken(_reusableToken); if (nextSuffixToken == null) { return false; } nextSuffixToken = UpdateSuffixToken(nextSuffixToken, _previousPrefixToken); SetCurrentToken(nextSuffixToken); return true; } private void SetCurrentToken(Token token) { if (token == null) return; ClearAttributes(); _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength()); _posIncrAtt.PositionIncrement = token.PositionIncrement; _flagsAtt.Flags =token.Flags; _offsetAtt.SetOffset(token.StartOffset, token.EndOffset); _typeAtt.Type = token.Type; _payloadAtt.Payload = token.Payload; } private Token GetNextPrefixInputToken(Token token) { if (!Prefix.IncrementToken()) return null; token.SetTermBuffer(_pTermAtt.TermBuffer(), 0, _pTermAtt.TermLength()); token.PositionIncrement = _pPosIncrAtt.PositionIncrement; token.Flags = _pFlagsAtt.Flags; token.SetOffset(_pOffsetAtt.StartOffset, _pOffsetAtt.EndOffset); token.Type = _pTypeAtt.Type; token.Payload = _pPayloadAtt.Payload; return token; } private Token GetNextSuffixInputToken(Token token) { if (!Suffix.IncrementToken()) return null; token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength()); token.PositionIncrement = _posIncrAtt.PositionIncrement; token.Flags = _flagsAtt.Flags; token.SetOffset(_offsetAtt.StartOffset, _offsetAtt.EndOffset); token.Type = _typeAtt.Type; token.Payload = _payloadAtt.Payload; return token; } /// /// The default implementation adds last prefix token end offset to the suffix token start and end offsets. /// /// a token from the suffix stream /// the last token from the prefix stream /// consumer token public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken) { suffixToken.StartOffset = lastPrefixToken.EndOffset + suffixToken.StartOffset; suffixToken.EndOffset = lastPrefixToken.EndOffset + suffixToken.EndOffset; return suffixToken; } protected override void Dispose(bool disposing) { Prefix.Dispose(); Suffix.Dispose(); } public override void Reset() { base.Reset(); if (Prefix != null) { _prefixExhausted = false; Prefix.Reset(); } if (Suffix != null) Suffix.Reset(); } } }