/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Lucene.Net.Support;
namespace Lucene.Net.Index
{
/// For each Field, store position by position information. It ignores frequency information
///
/// This is not thread-safe.
///
public class PositionBasedTermVectorMapper:TermVectorMapper
{
private IDictionary> fieldToTerms;
private System.String currentField;
/// A Map of Integer and TVPositionInfo
private IDictionary currentPositions;
private bool storeOffsets;
public PositionBasedTermVectorMapper():base(false, false)
{
}
public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets)
{
}
/// Never ignores positions. This mapper doesn't make much sense unless there are positions
/// false
public override bool IsIgnoringPositions
{
get { return false; }
}
/// Callback for the TermVectorReader.
///
///
///
///
///
///
///
///
public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
{
for (int i = 0; i < positions.Length; i++)
{
System.Int32 posVal = positions[i];
TVPositionInfo pos = currentPositions[posVal];
if (pos == null)
{
pos = new TVPositionInfo(positions[i], storeOffsets);
currentPositions[posVal] = pos;
}
pos.addTerm(term, offsets != null ? offsets[i] : TermVectorOffsetInfo.Null);
}
}
/// Callback mechanism used by the TermVectorReader
/// The field being read
///
/// The number of terms in the vector
///
/// Whether offsets are available
///
/// Whether positions are available
///
public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
{
if (storePositions == false)
{
throw new System.SystemException("You must store positions in order to use this Mapper");
}
if (storeOffsets == true)
{
//ignoring offsets
}
fieldToTerms = new HashMap>(numTerms);
this.storeOffsets = storeOffsets;
currentField = field;
currentPositions = new HashMap();
fieldToTerms[currentField] = currentPositions;
}
/// Get the mapping between fields and terms, sorted by the comparator
///
///
/// A map between field names and a Map. The sub-Map key is the position as the integer, the value is <see cref="Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo" />.
public virtual IDictionary> FieldToTerms
{
get { return fieldToTerms; }
}
/// Container for a term at a position
public class TVPositionInfo
{
///
/// The position of the term
///
virtual public int Position
{
get
{
return position;
}
}
/// Note, there may be multiple terms at the same position
/// A List of Strings
///
virtual public IList Terms
{
get
{
return terms;
}
}
/// Parallel list (to ) of TermVectorOffsetInfo objects.
/// There may be multiple entries since there may be multiple terms at a position
/// A List of TermVectorOffsetInfo objects, if offsets are store.
///
virtual public IList Offsets
{
get
{
return offsets;
}
}
private int position;
//a list of Strings
private IList terms;
//A list of TermVectorOffsetInfo
private IList offsets;
public TVPositionInfo(int position, bool storeOffsets)
{
this.position = position;
terms = new List();
if (storeOffsets)
{
offsets = new List();
}
}
internal virtual void addTerm(System.String term, TermVectorOffsetInfo info)
{
terms.Add(term);
if (offsets != null)
{
offsets.Add(info);
}
}
}
}
}