/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using IndexReader = Lucene.Net.Index.IndexReader; using Term = Lucene.Net.Index.Term; using BooleanClause = Lucene.Net.Search.BooleanClause; using BooleanQuery = Lucene.Net.Search.BooleanQuery; using DisjunctionMaxQuery = Lucene.Net.Search.DisjunctionMaxQuery; using FilteredQuery = Lucene.Net.Search.FilteredQuery; using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery; using PhraseQuery = Lucene.Net.Search.PhraseQuery; using Query = Lucene.Net.Search.Query; using TermQuery = Lucene.Net.Search.TermQuery; using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery; using SpanOrQuery = Lucene.Net.Search.Spans.SpanOrQuery; using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; namespace Lucene.Net.Search.Payloads { /// Experimental class to get set of payloads for most standard Lucene queries. /// Operates like Highlighter - IndexReader should only contain doc of interest, /// best to use MemoryIndex. /// ///

/// /// WARNING: The status of the Payloads feature is experimental. /// The APIs introduced here might change in the future and will not be /// supported anymore in such a case. /// ///

public class PayloadSpanUtil { private IndexReader reader; /// that contains doc with payloads to extract /// public PayloadSpanUtil(IndexReader reader) { this.reader = reader; } /// Query should be rewritten for wild/fuzzy support. /// /// /// /// /// payloads Collection /// /// IOException public virtual ICollection GetPayloadsForQuery(Query query) { ICollection payloads = new List(); QueryToSpanQuery(query, payloads); return payloads; } private void QueryToSpanQuery(Query query, ICollection payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { QueryToSpanQuery(queryClauses[i].Query, payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery) query).Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).Term); stq.Boost = query.Boost; GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery) query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery) query).Query, payloads); } else if (query is DisjunctionMaxQuery) { for (IEnumerator iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext(); ) { QueryToSpanQuery(iterator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) query; System.Collections.Generic.IList termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } IList[] disjunctLists = new IList[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List(termArray.Length)); ++distinctPositions; } foreach(Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray())); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } } } private void GetPayloads(ICollection payloads, SpanQuery query) { Spans.Spans spans = query.GetSpans(reader); while (spans.Next() == true) { if (spans.IsPayloadAvailable()) { ICollection payload = spans.GetPayload(); foreach (byte[] bytes in payload) { payloads.Add(bytes); } } } } } }