/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; namespace Lucene.Net.Index { /// For each Field, store position by position information. It ignores frequency information ///

/// This is not thread-safe. ///

public class PositionBasedTermVectorMapper:TermVectorMapper { private System.Collections.IDictionary fieldToTerms; private System.String currentField; /// A Map of Integer and TVPositionInfo private System.Collections.IDictionary currentPositions; private bool storeOffsets; /// /// /// public PositionBasedTermVectorMapper():base(false, false) { } public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets) { } /// Never ignores positions. This mapper doesn't make much sense unless there are positions /// false /// public override bool IsIgnoringPositions() { return false; } /// Callback for the TermVectorReader. /// /// /// /// /// /// /// /// public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { for (int i = 0; i < positions.Length; i++) { System.Int32 posVal = (System.Int32) positions[i]; TVPositionInfo pos = (TVPositionInfo) currentPositions[posVal]; if (pos == null) { pos = new TVPositionInfo(positions[i], storeOffsets); currentPositions[posVal] = pos; } pos.addTerm(term, offsets != null?offsets[i]:null); } } /// Callback mechanism used by the TermVectorReader /// The field being read /// /// The number of terms in the vector /// /// Whether offsets are available /// /// Whether positions are available /// public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) { if (storePositions == false) { throw new System.SystemException("You must store positions in order to use this Mapper"); } if (storeOffsets == true) { //ignoring offsets } fieldToTerms = new System.Collections.Hashtable(numTerms); this.storeOffsets = storeOffsets; currentField = field; currentPositions = new System.Collections.Hashtable(); fieldToTerms[currentField] = currentPositions; } /// Get the mapping between fields and terms, sorted by the comparator /// /// /// A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo}. /// public virtual System.Collections.IDictionary GetFieldToTerms() { return fieldToTerms; } /// Container for a term at a position public class TVPositionInfo { /// /// The position of the term /// virtual public int Position { get { return position; } } /// Note, there may be multiple terms at the same position /// A List of Strings /// virtual public System.Collections.IList Terms { get { return terms; } } /// Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position /// A List of TermVectorOffsetInfo objects, if offsets are store. /// virtual public System.Collections.IList Offsets { get { return offsets; } } private int position; //a list of Strings private System.Collections.IList terms; //A list of TermVectorOffsetInfo private System.Collections.IList offsets; public TVPositionInfo(int position, bool storeOffsets) { this.position = position; terms = new System.Collections.ArrayList(); if (storeOffsets) { offsets = new System.Collections.ArrayList(); } } internal virtual void addTerm(System.String term, TermVectorOffsetInfo info) { terms.Add(term); if (offsets != null) { offsets.Add(info); } } } } }