/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using Lucene.Net.Support; namespace Lucene.Net.Index { /// For each Field, store position by position information. It ignores frequency information ///

/// This is not thread-safe. ///

public class PositionBasedTermVectorMapper:TermVectorMapper { private IDictionary> fieldToTerms; private System.String currentField; /// A Map of Integer and TVPositionInfo private IDictionary currentPositions; private bool storeOffsets; public PositionBasedTermVectorMapper():base(false, false) { } public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets) { } /// Never ignores positions. This mapper doesn't make much sense unless there are positions /// false public override bool IsIgnoringPositions { get { return false; } } /// Callback for the TermVectorReader. /// /// /// /// /// /// /// /// public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { for (int i = 0; i < positions.Length; i++) { System.Int32 posVal = positions[i]; TVPositionInfo pos = currentPositions[posVal]; if (pos == null) { pos = new TVPositionInfo(positions[i], storeOffsets); currentPositions[posVal] = pos; } pos.addTerm(term, offsets != null ? offsets[i] : TermVectorOffsetInfo.Null); } } /// Callback mechanism used by the TermVectorReader /// The field being read /// /// The number of terms in the vector /// /// Whether offsets are available /// /// Whether positions are available /// public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) { if (storePositions == false) { throw new System.SystemException("You must store positions in order to use this Mapper"); } if (storeOffsets == true) { //ignoring offsets } fieldToTerms = new HashMap>(numTerms); this.storeOffsets = storeOffsets; currentField = field; currentPositions = new HashMap(); fieldToTerms[currentField] = currentPositions; } /// Get the mapping between fields and terms, sorted by the comparator /// /// /// A map between field names and a Map. The sub-Map key is the position as the integer, the value is <see cref="Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo" />. public virtual IDictionary> FieldToTerms { get { return fieldToTerms; } } /// Container for a term at a position public class TVPositionInfo { /// /// The position of the term /// virtual public int Position { get { return position; } } /// Note, there may be multiple terms at the same position /// A List of Strings /// virtual public IList Terms { get { return terms; } } /// Parallel list (to ) of TermVectorOffsetInfo objects. /// There may be multiple entries since there may be multiple terms at a position /// A List of TermVectorOffsetInfo objects, if offsets are store. /// virtual public IList Offsets { get { return offsets; } } private int position; //a list of Strings private IList terms; //A list of TermVectorOffsetInfo private IList offsets; public TVPositionInfo(int position, bool storeOffsets) { this.position = position; terms = new List(); if (storeOffsets) { offsets = new List(); } } internal virtual void addTerm(System.String term, TermVectorOffsetInfo info) { terms.Add(term); if (offsets != null) { offsets.Add(info); } } } } }