/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
namespace Lucene.Net.Index
{
/// For each Field, store position by position information. It ignores frequency information
///
/// This is not thread-safe.
///
public class PositionBasedTermVectorMapper:TermVectorMapper
{
private System.Collections.IDictionary fieldToTerms;
private System.String currentField;
/// A Map of Integer and TVPositionInfo
private System.Collections.IDictionary currentPositions;
private bool storeOffsets;
///
///
///
public PositionBasedTermVectorMapper():base(false, false)
{
}
public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets)
{
}
/// Never ignores positions. This mapper doesn't make much sense unless there are positions
/// false
///
public override bool IsIgnoringPositions()
{
return false;
}
/// Callback for the TermVectorReader.
///
///
///
///
///
///
///
///
public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
{
for (int i = 0; i < positions.Length; i++)
{
System.Int32 posVal = (System.Int32) positions[i];
TVPositionInfo pos = (TVPositionInfo) currentPositions[posVal];
if (pos == null)
{
pos = new TVPositionInfo(positions[i], storeOffsets);
currentPositions[posVal] = pos;
}
pos.addTerm(term, offsets != null?offsets[i]:null);
}
}
/// Callback mechanism used by the TermVectorReader
/// The field being read
///
/// The number of terms in the vector
///
/// Whether offsets are available
///
/// Whether positions are available
///
public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
{
if (storePositions == false)
{
throw new System.SystemException("You must store positions in order to use this Mapper");
}
if (storeOffsets == true)
{
//ignoring offsets
}
fieldToTerms = new System.Collections.Hashtable(numTerms);
this.storeOffsets = storeOffsets;
currentField = field;
currentPositions = new System.Collections.Hashtable();
fieldToTerms[currentField] = currentPositions;
}
/// Get the mapping between fields and terms, sorted by the comparator
///
///
/// A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo}.
///
public virtual System.Collections.IDictionary GetFieldToTerms()
{
return fieldToTerms;
}
/// Container for a term at a position
public class TVPositionInfo
{
///
/// The position of the term
///
virtual public int Position
{
get
{
return position;
}
}
/// Note, there may be multiple terms at the same position
/// A List of Strings
///
virtual public System.Collections.IList Terms
{
get
{
return terms;
}
}
/// Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position
/// A List of TermVectorOffsetInfo objects, if offsets are store.
///
virtual public System.Collections.IList Offsets
{
get
{
return offsets;
}
}
private int position;
//a list of Strings
private System.Collections.IList terms;
//A list of TermVectorOffsetInfo
private System.Collections.IList offsets;
public TVPositionInfo(int position, bool storeOffsets)
{
this.position = position;
terms = new System.Collections.ArrayList();
if (storeOffsets)
{
offsets = new System.Collections.ArrayList();
}
}
internal virtual void addTerm(System.String term, TermVectorOffsetInfo info)
{
terms.Add(term);
if (offsets != null)
{
offsets.Add(info);
}
}
}
}
}