/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using Lucene.Net.Support; namespace Lucene.Net.Index { /// Store a sorted collection of s. Collects all term information /// into a single, SortedSet. ///
/// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not /// know what Fields they correlate with. ///
/// This is not thread-safe ///
public class SortedTermVectorMapper:TermVectorMapper { private SortedSet currentSet; private IDictionary termToTVE = new HashMap(); private bool storeOffsets; private bool storePositions; /// Stand-in name for the field in . public const System.String ALL = "_ALL_"; /// /// A Comparator for sorting s /// public SortedTermVectorMapper(IComparer comparator) : this(false, false, comparator) { } public SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer comparator) : base(ignoringPositions, ignoringOffsets) { currentSet = new SortedSet(comparator); } /// /// The term to map /// /// The frequency of the term /// /// Offset information, may be null /// /// Position information, may be null /// //We need to combine any previous mentions of the term public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { TermVectorEntry entry = termToTVE[term]; if (entry == null) { entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true?offsets:null, storePositions == true?positions:null); termToTVE[term] = entry; currentSet.Add(entry); } else { entry.Frequency = entry.Frequency + frequency; if (storeOffsets) { TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets(); //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions if (existingOffsets != null && offsets != null && offsets.Length > 0) { //copy over the existing offsets TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length]; Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length); Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length); entry.SetOffsets(newOffsets); } else if (existingOffsets == null && offsets != null && offsets.Length > 0) { entry.SetOffsets(offsets); } //else leave it alone } if (storePositions) { int[] existingPositions = entry.GetPositions(); if (existingPositions != null && positions != null && positions.Length > 0) { int[] newPositions = new int[existingPositions.Length + positions.Length]; Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length); Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length); entry.SetPositions(newPositions); } else if (existingPositions == null && positions != null && positions.Length > 0) { entry.SetPositions(positions); } } } } public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) { this.storeOffsets = storeOffsets; this.storePositions = storePositions; } /// The TermVectorEntrySet. A SortedSet of objects. Sort is by the comparator passed into the constructor. ///
/// This set will be empty until after the mapping process takes place. /// ///
/// The SortedSet of <see cref="TermVectorEntry" />. public virtual SortedSet TermVectorEntrySet { get { return currentSet; } } } }