/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Document = Lucene.Net.Documents.Document; using Directory = Lucene.Net.Store.Directory; using InputStream = Lucene.Net.Store.InputStream; using OutputStream = Lucene.Net.Store.OutputStream; using BitVector = Lucene.Net.Util.BitVector; namespace Lucene.Net.Index { /// FIXME: Describe class SegmentReader here. /// /// /// $Id: SegmentReader.java,v 1.23 2004/07/10 06:19:01 otis Exp $ /// sealed public class SegmentReader : IndexReader { private System.String segment; internal FieldInfos fieldInfos; private FieldsReader fieldsReader; internal TermInfosReader tis; internal TermVectorsReader termVectorsReader; internal BitVector deletedDocs = null; private bool deletedDocsDirty = false; private bool normsDirty = false; private bool undeleteAll = false; internal InputStream freqStream; internal InputStream proxStream; // Compound File Reader when based on a compound file segment internal CompoundFileReader cfsReader; private class Norm { private void InitBlock(SegmentReader enclosingInstance) { this.enclosingInstance = enclosingInstance; } private SegmentReader enclosingInstance; public SegmentReader Enclosing_Instance { get { return enclosingInstance; } } public Norm(SegmentReader enclosingInstance, InputStream in_Renamed, int number) { InitBlock(enclosingInstance); this.in_Renamed = in_Renamed; this.number = number; } public InputStream in_Renamed; // private -> public public byte[] bytes; // private -> public public bool dirty; // private -> public public int number; // private -> public public void ReWrite() // private -> public { // NOTE: norms are re-written in regular directory, not cfs OutputStream out_Renamed = Enclosing_Instance.Directory().CreateFile(Enclosing_Instance.segment + ".tmp"); try { out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc()); } finally { out_Renamed.Close(); } System.String fileName = Enclosing_Instance.segment + ".f" + number; Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName); this.dirty = false; } } private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable()); public /*internal*/ SegmentReader(SegmentInfos sis, SegmentInfo si, bool closeDir) : base(si.dir, sis, closeDir) { Initialize(si); } public /*internal*/ SegmentReader(SegmentInfo si) : base(si.dir) { Initialize(si); } private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) deletedDocs = new BitVector(Directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenFile(segment + ".frq"); proxStream = cfsDir.OpenFile(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos); } } protected internal override void DoCommit() { if (deletedDocsDirty) { // re-write deleted deletedDocs.Write(Directory(), segment + ".tmp"); Directory().RenameFile(segment + ".tmp", segment + ".del"); } if (undeleteAll && Directory().FileExists(segment + ".del")) { Directory().DeleteFile(segment + ".del"); } if (normsDirty) { // re-write norms System.Collections.IEnumerator values = norms.Values.GetEnumerator(); while (values.MoveNext()) { Norm norm = (Norm) values.Current; if (norm.dirty) { norm.ReWrite(); } } } deletedDocsDirty = false; normsDirty = false; undeleteAll = false; } protected internal override void DoClose() { fieldsReader.Close(); tis.Close(); if (freqStream != null) freqStream.Close(); if (proxStream != null) proxStream.Close(); CloseNorms(); if (termVectorsReader != null) termVectorsReader.Close(); if (cfsReader != null) cfsReader.Close(); } internal static bool HasDeletions(SegmentInfo si) { return si.dir.FileExists(si.name + ".del"); } public override bool HasDeletions() { return deletedDocs != null; } internal static bool UsesCompoundFile(SegmentInfo si) { return si.dir.FileExists(si.name + ".cfs"); } internal static bool HasSeparateNorms(SegmentInfo si) { System.String[] result = si.dir.List(); System.String pattern = si.name + ".f"; int patternLength = pattern.Length; for (int i = 0; i < 0; i++) { if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength])) return true; } return false; } protected internal override void DoDelete(int docNum) { if (deletedDocs == null) deletedDocs = new BitVector(MaxDoc()); deletedDocsDirty = true; undeleteAll = false; deletedDocs.Set(docNum); } protected internal override void DoUndeleteAll() { deletedDocs = null; deletedDocsDirty = false; undeleteAll = true; } internal System.Collections.ArrayList Files() { System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16)); System.String[] ext = new System.String[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"}; for (int i = 0; i < ext.Length; i++) { System.String name = segment + "." + ext[i]; if (Directory().FileExists(name)) files.Add(name); } for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed) files.Add(segment + ".f" + i); } return files; } public override TermEnum Terms() { return tis.Terms(); } public override TermEnum Terms(Term t) { return tis.Terms(t); } public override Document Document(int n) { lock (this) { if (IsDeleted(n)) throw new System.ArgumentException("attempt to access a deleted document"); return fieldsReader.Doc(n); } } public override bool IsDeleted(int n) { lock (this) { return (deletedDocs != null && deletedDocs.Get(n)); } } public override TermDocs TermDocs() { return new SegmentTermDocs(this); } public override TermPositions TermPositions() { return new SegmentTermPositions(this); } public override int DocFreq(Term t) { TermInfo ti = tis.Get(t); if (ti != null) return ti.docFreq; else return 0; } public override int NumDocs() { int n = MaxDoc(); if (deletedDocs != null) n -= deletedDocs.Count(); return n; } public override int MaxDoc() { return fieldsReader.Size(); } /// /// public override System.Collections.ICollection GetFieldNames() { // maintain a unique set of Field names System.Collections.Hashtable fieldSet = new System.Collections.Hashtable(); for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); fieldSet.Add(fi.name, fi.name); } return fieldSet; } /// /// public override System.Collections.ICollection GetFieldNames(bool indexed) { // maintain a unique set of Field names System.Collections.Hashtable fieldSet = new System.Collections.Hashtable(); for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed == indexed) fieldSet.Add(fi.name, fi.name); } return fieldSet; } /// /// if true, returns only Indexed fields that have term vector info, /// else only indexed fields without term vector info /// /// Collection of Strings indicating the names of the fields /// public override System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector) { // maintain a unique set of Field names System.Collections.Hashtable fieldSet = new System.Collections.Hashtable(); for (int ii = 0; ii < fieldInfos.Size(); ii++) { FieldInfo fi = fieldInfos.FieldInfo(ii); if (fi.isIndexed == true && fi.storeTermVector == storedTermVector) { fieldSet.Add(fi.name, fi.name); } } return fieldSet; } public override byte[] Norms(System.String field) { lock (this) { Norm norm = (Norm) norms[field]; if (norm == null) // not an indexed Field return null; if (norm.bytes == null) { // value not yet read byte[] bytes = new byte[MaxDoc()]; Norms(field, bytes, 0); norm.bytes = bytes; // cache it } return norm.bytes; } } protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed) { Norm norm = (Norm) norms[field]; if (norm == null) // not an indexed Field return ; norm.dirty = true; // mark it dirty normsDirty = true; Norms(field)[doc] = value_Renamed; // set the value } /// Read norms into a pre-allocated array. public override void Norms(System.String field, byte[] bytes, int offset) { lock (this) { Norm norm = (Norm) norms[field]; if (norm == null) return ; // use zeros in array if (norm.bytes != null) { // can copy from cache Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc()); return ; } InputStream normStream = (InputStream) norm.in_Renamed.Clone(); try { // read from disk normStream.Seek(0); normStream.ReadBytes(bytes, offset, MaxDoc()); } finally { normStream.Close(); } } } private void OpenNorms(Directory cfsDir) { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed) { System.String fileName = segment + ".f" + fi.number; // look first for re-written file, then in compound format Directory d = Directory().FileExists(fileName)?Directory():cfsDir; norms[fi.name] = new Norm(this, d.OpenFile(fileName), fi.number); } } } private void CloseNorms() { lock (norms.SyncRoot) { System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator(); while (enumerator.MoveNext()) { Norm norm = (Norm) enumerator.Current; norm.in_Renamed.Close(); } } } /// Return a term frequency vector for the specified document and Field. The /// vector returned contains term numbers and frequencies for all terms in /// the specified Field of this document, if the Field had storeTermVector /// flag set. If the flag was not set, the method returns null. /// public override TermFreqVector GetTermFreqVector(int docNumber, System.String field) { // Check if this Field is invalid or has no stored term vector FieldInfo fi = fieldInfos.FieldInfo(field); if (fi == null || !fi.storeTermVector) return null; return termVectorsReader.Get(docNumber, field); } /// Return an array of term frequency vectors for the specified document. /// The array contains a vector for each vectorized Field in the document. /// Each vector vector contains term numbers and frequencies for all terms /// in a given vectorized Field. /// If no such fields existed, the method returns null. /// public override TermFreqVector[] GetTermFreqVectors(int docNumber) { if (termVectorsReader == null) return null; return termVectorsReader.Get(docNumber); } } }