/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Document = Lucene.Net.Documents.Document;
using FieldSelector = Lucene.Net.Documents.FieldSelector;
using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using BitVector = Lucene.Net.Util.BitVector;
namespace Lucene.Net.Index
{
/// $Id: SegmentReader.java 496851 2007-01-16 20:24:52Z mikemccand $
///
public class SegmentReader : IndexReader
{
private System.String segment;
private SegmentInfo si;
internal FieldInfos fieldInfos;
private FieldsReader fieldsReader;
internal TermInfosReader tis;
internal TermVectorsReader termVectorsReaderOrig = null;
internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
internal BitVector deletedDocs = null;
private bool deletedDocsDirty = false;
private bool normsDirty = false;
private bool undeleteAll = false;
private bool rollbackDeletedDocsDirty = false;
private bool rollbackNormsDirty = false;
private bool rollbackUndeleteAll = false;
internal IndexInput freqStream;
internal IndexInput proxStream;
// Compound File Reader when based on a compound file segment
internal CompoundFileReader cfsReader = null;
public FieldInfos FieldInfos
{
get { return fieldInfos; }
}
public IndexInput ProxStream
{
get { return proxStream; }
set { proxStream = value; }
}
private class Norm
{
private void InitBlock(SegmentReader enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private SegmentReader enclosingInstance;
public SegmentReader Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek)
{
InitBlock(enclosingInstance);
this.in_Renamed = in_Renamed;
this.number = number;
this.normSeek = normSeek;
}
internal IndexInput in_Renamed;
internal byte[] bytes;
internal bool dirty;
internal int number;
internal long normSeek;
internal bool rollbackDirty;
internal void ReWrite(SegmentInfo si)
{
// NOTE: norms are re-written in regular directory, not cfs
System.String oldFileName = si.GetNormFileName(this.number);
if (oldFileName != null && !oldFileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
{
// Mark this file for deletion. Note that we don't
// actually try to delete it until the new segments files is
// successfully written:
Enclosing_Instance.deleter.AddPendingFile(oldFileName);
}
si.AdvanceNormGen(this.number);
IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(si.GetNormFileName(this.number));
try
{
out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
}
finally
{
out_Renamed.Close();
}
this.dirty = false;
}
}
private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
/// The class which implements SegmentReader.
private static System.Type IMPL;
public SegmentReader() : base(null)
{
}
public static SegmentReader Get(SegmentInfo si)
{
return Get(si.dir, si, null, false, false);
}
public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
{
return Get(si.dir, si, sis, closeDir, true);
}
public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
{
SegmentReader instance;
try
{
instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
}
catch (System.Exception e)
{
throw new System.SystemException("cannot load SegmentReader class: " + e, e);
}
instance.Init(dir, sis, closeDir, ownDir);
instance.Initialize(si);
return instance;
}
private void Initialize(SegmentInfo si)
{
segment = si.name;
this.si = si;
bool success = false;
try
{
// Use compound file directory for some files, if it exists
Directory cfsDir = Directory();
if (si.GetUseCompoundFile())
{
cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
cfsDir = cfsReader;
}
// No compound file exists - use the multi-file format
fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
// Verify two sources of "maxDoc" agree:
if (fieldsReader.Size() != si.docCount)
{
throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
}
tis = new TermInfosReader(cfsDir, segment, fieldInfos);
// NOTE: the bitvector is stored using the regular directory, not cfs
if (HasDeletions(si))
{
deletedDocs = new BitVector(Directory(), si.GetDelFileName());
// Verify # deletes does not exceed maxDoc for this segment:
if (deletedDocs.Count() > MaxDoc())
{
throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
}
}
// make sure that all index files have been read or are kept open
// so that if an index update removes them we'll still have them
freqStream = cfsDir.OpenInput(segment + ".frq");
proxStream = cfsDir.OpenInput(segment + ".prx");
OpenNorms(cfsDir);
if (fieldInfos.HasVectors())
{
// open term vector files only as needed
termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
}
success = true;
}
finally
{
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success)
{
DoClose();
}
}
}
protected internal override void DoCommit()
{
if (deletedDocsDirty)
{
// re-write deleted
System.String oldDelFileName = si.GetDelFileName();
if (oldDelFileName != null)
{
// Mark this file for deletion. Note that we don't
// actually try to delete it until the new segments files is
// successfully written:
deleter.AddPendingFile(oldDelFileName);
}
si.AdvanceDelGen();
// We can write directly to the actual name (vs to a
// .tmp & renaming it) because the file is not live
// until segments file is written:
deletedDocs.Write(Directory(), si.GetDelFileName());
}
if (undeleteAll && si.HasDeletions())
{
System.String oldDelFileName = si.GetDelFileName();
if (oldDelFileName != null)
{
// Mark this file for deletion. Note that we don't
// actually try to delete it until the new segments files is
// successfully written:
deleter.AddPendingFile(oldDelFileName);
}
si.ClearDelGen();
}
if (normsDirty)
{
// re-write norms
si.SetNumFields(fieldInfos.Size());
System.Collections.IEnumerator values = norms.Values.GetEnumerator();
while (values.MoveNext())
{
Norm norm = (Norm) values.Current;
if (norm.dirty)
{
norm.ReWrite(si);
}
}
}
deletedDocsDirty = false;
normsDirty = false;
undeleteAll = false;
}
protected internal override void DoClose()
{
if (fieldsReader != null)
{
fieldsReader.Close();
}
if (tis != null)
{
tis.Close();
}
if (freqStream != null)
freqStream.Close();
if (proxStream != null)
proxStream.Close();
CloseNorms();
if (termVectorsReaderOrig != null)
termVectorsReaderOrig.Close();
if (cfsReader != null)
cfsReader.Close();
}
internal static bool HasDeletions(SegmentInfo si)
{
return si.HasDeletions();
}
public override bool HasDeletions()
{
return deletedDocs != null;
}
internal static bool UsesCompoundFile(SegmentInfo si)
{
return si.GetUseCompoundFile();
}
internal static bool HasSeparateNorms(SegmentInfo si)
{
return si.HasSeparateNorms();
}
protected internal override void DoDelete(int docNum)
{
if (deletedDocs == null)
deletedDocs = new BitVector(MaxDoc());
deletedDocsDirty = true;
undeleteAll = false;
deletedDocs.Set(docNum);
}
protected internal override void DoUndeleteAll()
{
deletedDocs = null;
deletedDocsDirty = false;
undeleteAll = true;
}
internal virtual System.Collections.ArrayList Files()
{
System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
if (si.GetUseCompoundFile())
{
System.String name = segment + ".cfs";
if (Directory().FileExists(name))
{
files.Add(name);
}
}
else
{
for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
{
System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
if (Directory().FileExists(name))
files.Add(name);
}
}
if (si.HasDeletions())
{
files.Add(si.GetDelFileName());
}
bool addedNrm = false;
for (int i = 0; i < fieldInfos.Size(); i++)
{
System.String name = si.GetNormFileName(i);
if (name != null && Directory().FileExists(name))
{
if (name.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
{
if (addedNrm)
continue; // add .nrm just once
addedNrm = true;
}
files.Add(name);
}
}
return files;
}
public override TermEnum Terms()
{
return tis.Terms();
}
public override TermEnum Terms(Term t)
{
return tis.Terms(t);
}
public override Document Document(int n, FieldSelector fieldSelector)
{
lock (this)
{
if (IsDeleted(n))
throw new System.ArgumentException("attempt to access a deleted document");
return fieldsReader.Doc(n, fieldSelector);
}
}
public override bool IsDeleted(int n)
{
lock (this)
{
return (deletedDocs != null && deletedDocs.Get(n));
}
}
public override TermDocs TermDocs()
{
return new SegmentTermDocs(this);
}
public override TermPositions TermPositions()
{
return new SegmentTermPositions(this);
}
public override int DocFreq(Term t)
{
TermInfo ti = tis.Get(t);
if (ti != null)
return ti.docFreq;
else
return 0;
}
public override int NumDocs()
{
int n = MaxDoc();
if (deletedDocs != null)
n -= deletedDocs.Count();
return n;
}
public override int MaxDoc()
{
return si.docCount;
}
///
///
public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
{
System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
for (int i = 0; i < fieldInfos.Size(); i++)
{
FieldInfo fi = fieldInfos.FieldInfo(i);
if (fieldOption == IndexReader.FieldOption.ALL)
{
fieldSet.Add(fi.name, fi.name);
}
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
{
fieldSet.Add(fi.name, fi.name);
}
else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
{
fieldSet.Add(fi.name, fi.name);
}
else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
{
fieldSet.Add(fi.name, fi.name);
}
else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
{
fieldSet.Add(fi.name, fi.name);
}
else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
{
fieldSet.Add(fi.name, fi.name);
}
else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
{
fieldSet.Add(fi.name, fi.name);
}
else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
{
fieldSet.Add(fi.name, fi.name);
}
else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
{
fieldSet.Add(fi.name, fi.name);
}
}
return fieldSet;
}
public override bool HasNorms(System.String field)
{
lock (this)
{
return norms.ContainsKey(field);
}
}
internal static byte[] CreateFakeNorms(int size)
{
byte[] ones = new byte[size];
byte val = DefaultSimilarity.EncodeNorm(1.0f);
for (int index = 0; index < size; index++)
ones[index] = val;
return ones;
}
private byte[] ones;
private byte[] FakeNorms()
{
if (ones == null)
ones = CreateFakeNorms(MaxDoc());
return ones;
}
// can return null if norms aren't stored
protected internal virtual byte[] GetNorms(System.String field)
{
lock (this)
{
Norm norm = (Norm) norms[field];
if (norm == null)
return null; // not indexed, or norms not stored
if (norm.bytes == null)
{
// value not yet read
byte[] bytes = new byte[MaxDoc()];
Norms(field, bytes, 0);
norm.bytes = bytes; // cache it
}
return norm.bytes;
}
}
// returns fake norms if norms aren't available
public override byte[] Norms(System.String field)
{
lock (this)
{
byte[] bytes = GetNorms(field);
if (bytes == null)
bytes = FakeNorms();
return bytes;
}
}
protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
{
Norm norm = (Norm) norms[field];
if (norm == null)
// not an indexed field
return ;
norm.dirty = true; // mark it dirty
normsDirty = true;
Norms(field)[doc] = value_Renamed; // set the value
}
/// Read norms into a pre-allocated array.
public override void Norms(System.String field, byte[] bytes, int offset)
{
lock (this)
{
Norm norm = (Norm) norms[field];
if (norm == null)
{
Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
return ;
}
if (norm.bytes != null)
{
// can copy from cache
Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
return ;
}
IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
try
{
// read from disk
normStream.Seek(norm.normSeek);
normStream.ReadBytes(bytes, offset, MaxDoc());
}
finally
{
normStream.Close();
}
}
}
private void OpenNorms(Directory cfsDir)
{
long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now)
int maxDoc = MaxDoc();
for (int i = 0; i < fieldInfos.Size(); i++)
{
FieldInfo fi = fieldInfos.FieldInfo(i);
if (fi.isIndexed && !fi.omitNorms)
{
Directory d = Directory();
System.String fileName = si.GetNormFileName(fi.number);
if (!si.HasSeparateNorms(fi.number))
{
d = cfsDir;
}
long normSeek = (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)?nextNormSeek:0);
norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number, normSeek);
nextNormSeek += maxDoc; // increment also if some norms are separate
}
}
}
private void CloseNorms()
{
lock (norms.SyncRoot)
{
System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
while (enumerator.MoveNext())
{
Norm norm = (Norm) enumerator.Current;
norm.in_Renamed.Close();
}
}
}
/// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
/// TermVectorsReader
///
private TermVectorsReader GetTermVectorsReader()
{
TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
if (tvReader == null)
{
tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
System.Threading.Thread.SetData(termVectorsLocal, tvReader);
}
return tvReader;
}
/// Return a term frequency vector for the specified document and field. The
/// vector returned contains term numbers and frequencies for all terms in
/// the specified field of this document, if the field had storeTermVector
/// flag set. If the flag was not set, the method returns null.
///
/// IOException
public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
{
// Check if this field is invalid or has no stored term vector
FieldInfo fi = fieldInfos.FieldInfo(field);
if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
return null;
TermVectorsReader termVectorsReader = GetTermVectorsReader();
if (termVectorsReader == null)
return null;
return termVectorsReader.Get(docNumber, field);
}
/// Return an array of term frequency vectors for the specified document.
/// The array contains a vector for each vectorized field in the document.
/// Each vector vector contains term numbers and frequencies for all terms
/// in a given vectorized field.
/// If no such fields existed, the method returns null.
///
/// IOException
public override TermFreqVector[] GetTermFreqVectors(int docNumber)
{
if (termVectorsReaderOrig == null)
return null;
TermVectorsReader termVectorsReader = GetTermVectorsReader();
if (termVectorsReader == null)
return null;
return termVectorsReader.Get(docNumber);
}
/// Return the name of the segment this reader is reading.
internal virtual System.String GetSegmentName()
{
return segment;
}
internal virtual void SetSegmentInfo(SegmentInfo info)
{
si = info;
}
internal override void StartCommit()
{
base.StartCommit();
rollbackDeletedDocsDirty = deletedDocsDirty;
rollbackNormsDirty = normsDirty;
rollbackUndeleteAll = undeleteAll;
System.Collections.IEnumerator values = norms.Values.GetEnumerator();
while (values.MoveNext())
{
Norm norm = (Norm) values.Current;
norm.rollbackDirty = norm.dirty;
}
}
internal override void RollbackCommit()
{
base.RollbackCommit();
deletedDocsDirty = rollbackDeletedDocsDirty;
normsDirty = rollbackNormsDirty;
undeleteAll = rollbackUndeleteAll;
System.Collections.IEnumerator values = norms.Values.GetEnumerator();
while (values.MoveNext())
{
Norm norm = (Norm) values.Current;
norm.dirty = norm.rollbackDirty;
}
}
static SegmentReader()
{
{
try
{
System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
IMPL = System.Type.GetType(name);
}
catch (System.Security.SecurityException se)
{
try
{
IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
}
catch (System.Exception e)
{
throw new System.SystemException("cannot load default SegmentReader class: " + e, e);
}
}
catch (System.Exception e)
{
throw new System.SystemException("cannot load SegmentReader class: " + e, e);
}
}
}
}
}