/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Lucene.Net.Support;
namespace Lucene.Net.Index
{
/// Store a sorted collection of s. Collects all term information
/// into a single, SortedSet.
///
/// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not
/// know what Fields they correlate with.
///
/// This is not thread-safe
///
public class SortedTermVectorMapper:TermVectorMapper
{
private SortedSet currentSet;
private IDictionary termToTVE = new HashMap();
private bool storeOffsets;
private bool storePositions;
/// Stand-in name for the field in .
public const System.String ALL = "_ALL_";
///
/// A Comparator for sorting s
///
public SortedTermVectorMapper(IComparer comparator)
: this(false, false, comparator)
{
}
public SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer comparator)
: base(ignoringPositions, ignoringOffsets)
{
currentSet = new SortedSet(comparator);
}
///
/// The term to map
///
/// The frequency of the term
///
/// Offset information, may be null
///
/// Position information, may be null
///
//We need to combine any previous mentions of the term
public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
{
TermVectorEntry entry = termToTVE[term];
if (entry == null)
{
entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true?offsets:null, storePositions == true?positions:null);
termToTVE[term] = entry;
currentSet.Add(entry);
}
else
{
entry.Frequency = entry.Frequency + frequency;
if (storeOffsets)
{
TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets();
//A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions
if (existingOffsets != null && offsets != null && offsets.Length > 0)
{
//copy over the existing offsets
TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length];
Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length);
Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length);
entry.SetOffsets(newOffsets);
}
else if (existingOffsets == null && offsets != null && offsets.Length > 0)
{
entry.SetOffsets(offsets);
}
//else leave it alone
}
if (storePositions)
{
int[] existingPositions = entry.GetPositions();
if (existingPositions != null && positions != null && positions.Length > 0)
{
int[] newPositions = new int[existingPositions.Length + positions.Length];
Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length);
Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length);
entry.SetPositions(newPositions);
}
else if (existingPositions == null && positions != null && positions.Length > 0)
{
entry.SetPositions(positions);
}
}
}
}
public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
{
this.storeOffsets = storeOffsets;
this.storePositions = storePositions;
}
/// The TermVectorEntrySet. A SortedSet of objects. Sort is by the comparator passed into the constructor.
///
/// This set will be empty until after the mapping process takes place.
///
///
/// The SortedSet of <see cref="TermVectorEntry" />.
public virtual SortedSet TermVectorEntrySet
{
get { return currentSet; }
}
}
}