/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using Lucene.Net.Support; using Document = Lucene.Net.Documents.Document; using FieldSelector = Lucene.Net.Documents.FieldSelector; using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult; namespace Lucene.Net.Index { /// An IndexReader which reads multiple, parallel indexes. Each index added /// must have the same number of documents, but typically each contains /// different fields. Each document contains the union of the fields of all /// documents with the same document number. When searching, matches for a /// query term are from the first index added that has the field. /// ///

This is useful, e.g., with collections that have large fields which /// change rarely and small fields that change more frequently. The smaller /// fields may be re-indexed in a new index and both indexes may be searched /// together. /// ///

Warning: It is up to you to make sure all indexes /// are created and modified the same way. For example, if you add /// documents to one index, you need to add the same documents in the /// same order to the other indexes. Failure to do so will result in /// undefined behavior. ///

public class ParallelReader:IndexReader, System.ICloneable { private List readers = new List(); private List decrefOnClose = new List(); // remember which subreaders to decRef on close internal bool incRefReaders = false; private SortedDictionary fieldToReader = new SortedDictionary(); private IDictionary> readerToFields = new HashMap>(); private List storedFieldReaders = new List(); private int maxDoc; private int numDocs; private bool hasDeletions; /// Construct a ParallelReader. ///

Note that all subreaders are closed if this ParallelReader is closed.

///

public ParallelReader():this(true) { } /// Construct a ParallelReader. /// indicates whether the subreaders should be closed /// when this ParallelReader is closed /// public ParallelReader(bool closeSubReaders):base() { this.incRefReaders = !closeSubReaders; } /// Add an IndexReader. /// IOException if there is a low-level IO error public virtual void Add(IndexReader reader) { EnsureOpen(); Add(reader, false); } /// Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// /// IllegalArgumentException if not all indexes contain the same number /// of documents /// /// IllegalArgumentException if not all indexes have the same value /// of /// /// IOException if there is a low-level IO error public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc; this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions; } if (reader.MaxDoc != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); ICollection fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; foreach(var field in fields) { // update fieldToReader map // Do a containskey firt to mimic java behavior if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); } public override System.Object Clone() { try { return DoReopen(true); } catch (System.Exception ex) { throw new System.SystemException(ex.Message, ex); } } /// Tries to reopen the subreaders. ///
/// If one or more subreaders could be re-opened (i. e. subReader.reopen() /// returned a new instance != subReader), then a new ParallelReader instance /// is returned, otherwise this instance is returned. ///

/// A re-opened instance might share one or more subreaders with the old /// instance. Index modification operations result in undefined behavior /// when performed before the old instance is closed. /// (see ). ///

/// If subreaders are shared, then the reference count of those /// readers is increased to ensure that the subreaders remain open /// until the last referring reader is closed. /// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public override IndexReader Reopen() { lock (this) { return DoReopen(false); } } protected internal virtual IndexReader DoReopen(bool doClone) { EnsureOpen(); bool reopened = false; IList newReaders = new List(); bool success = false; try { foreach(var oldReader in readers) { IndexReader newReader = null; if (doClone) { newReader = (IndexReader) oldReader.Clone(); } else { newReader = oldReader.Reopen(); } newReaders.Add(newReader); // if at least one of the subreaders was updated we remember that // and return a new ParallelReader if (newReader != oldReader) { reopened = true; } } success = true; } finally { if (!success && reopened) { for (int i = 0; i < newReaders.Count; i++) { IndexReader r = newReaders[i]; if (r != readers[i]) { try { r.Close(); } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } } } } if (reopened) { List newDecrefOnClose = new List(); ParallelReader pr = new ParallelReader(); for (int i = 0; i < readers.Count; i++) { IndexReader oldReader = readers[i]; IndexReader newReader = newReaders[i]; if (newReader == oldReader) { newDecrefOnClose.Add(true); newReader.IncRef(); } else { // this is a new subreader instance, so on close() we don't // decRef but close it newDecrefOnClose.Add(false); } pr.Add(newReader, !storedFieldReaders.Contains(oldReader)); } pr.decrefOnClose = newDecrefOnClose; pr.incRefReaders = incRefReaders; return pr; } else { // No subreader was refreshed return this; } } public override int NumDocs() { // Don't call ensureOpen() here (it could affect performance) return numDocs; } public override int MaxDoc { get { // Don't call ensureOpen() here (it could affect performance) return maxDoc; } } public override bool HasDeletions { get { // Don't call ensureOpen() here (it could affect performance) return hasDeletions; } } // check first reader public override bool IsDeleted(int n) { // Don't call ensureOpen() here (it could affect performance) if (readers.Count > 0) return readers[0].IsDeleted(n); return false; } // delete in all readers protected internal override void DoDelete(int n) { foreach(var reader in readers) { reader.DeleteDocument(n); } hasDeletions = true; } // undeleteAll in all readers protected internal override void DoUndeleteAll() { foreach(var reader in readers) { reader.UndeleteAll(); } hasDeletions = false; } // append fields from storedFieldReaders public override Document Document(int n, FieldSelector fieldSelector) { EnsureOpen(); Document result = new Document(); foreach(IndexReader reader in storedFieldReaders) { bool include = (fieldSelector == null); if (!include) { var fields = readerToFields[reader]; foreach(var field in fields) { if (fieldSelector.Accept(field) != FieldSelectorResult.NO_LOAD) { include = true; break; } } } if (include) { var fields = reader.Document(n, fieldSelector).GetFields(); foreach(var field in fields) { result.Add(field); } } } return result; } // get all vectors public override ITermFreqVector[] GetTermFreqVectors(int n) { EnsureOpen(); IList results = new List(); foreach(var e in fieldToReader) { System.String field = e.Key; IndexReader reader = e.Value; ITermFreqVector vector = reader.GetTermFreqVector(n, field); if (vector != null) results.Add(vector); } return results.ToArray(); } public override ITermFreqVector GetTermFreqVector(int n, System.String field) { EnsureOpen(); IndexReader reader = (fieldToReader[field]); return reader == null?null:reader.GetTermFreqVector(n, field); } public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) { EnsureOpen(); IndexReader reader = (fieldToReader[field]); if (reader != null) { reader.GetTermFreqVector(docNumber, field, mapper); } } public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) { EnsureOpen(); foreach(var e in fieldToReader) { System.String field = e.Key; IndexReader reader = e.Value; reader.GetTermFreqVector(docNumber, field, mapper); } } public override bool HasNorms(System.String field) { EnsureOpen(); IndexReader reader = fieldToReader[field]; return reader != null && reader.HasNorms(field); } public override byte[] Norms(System.String field) { EnsureOpen(); IndexReader reader = fieldToReader[field]; return reader == null?null:reader.Norms(field); } public override void Norms(System.String field, byte[] result, int offset) { EnsureOpen(); IndexReader reader = fieldToReader[field]; if (reader != null) reader.Norms(field, result, offset); } protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed) { IndexReader reader = fieldToReader[field]; if (reader != null) reader.DoSetNorm(n, field, value_Renamed); } public override TermEnum Terms() { EnsureOpen(); return new ParallelTermEnum(this); } public override TermEnum Terms(Term term) { EnsureOpen(); return new ParallelTermEnum(this, term); } public override int DocFreq(Term term) { EnsureOpen(); IndexReader reader = fieldToReader[term.Field]; return reader == null?0:reader.DocFreq(term); } public override TermDocs TermDocs(Term term) { EnsureOpen(); return new ParallelTermDocs(this, term); } public override TermDocs TermDocs() { EnsureOpen(); return new ParallelTermDocs(this); } public override TermPositions TermPositions(Term term) { EnsureOpen(); return new ParallelTermPositions(this, term); } public override TermPositions TermPositions() { EnsureOpen(); return new ParallelTermPositions(this); } /// Checks recursively if all subreaders are up to date. public override bool IsCurrent() { foreach (var reader in readers) { if (!reader.IsCurrent()) { return false; } } // all subreaders are up to date return true; } /// Checks recursively if all subindexes are optimized public override bool IsOptimized() { foreach (var reader in readers) { if (!reader.IsOptimized()) { return false; } } // all subindexes are optimized return true; } /// Not implemented. /// UnsupportedOperationException public override long Version { get { throw new System.NotSupportedException("ParallelReader does not support this method."); } } // for testing public /*internal*/ virtual IndexReader[] GetSubReaders() { return readers.ToArray(); } protected internal override void DoCommit(IDictionary commitUserData) { foreach(var reader in readers) reader.Commit(commitUserData); } protected internal override void DoClose() { lock (this) { for (int i = 0; i < readers.Count; i++) { if (decrefOnClose[i]) { readers[i].DecRef(); } else { readers[i].Close(); } } } Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this); } public override System.Collections.Generic.ICollection GetFieldNames(IndexReader.FieldOption fieldNames) { EnsureOpen(); ISet fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); foreach(var reader in readers) { ICollection names = reader.GetFieldNames(fieldNames); fieldSet.UnionWith(names); } return fieldSet; } private class ParallelTermEnum : TermEnum { private void InitBlock(ParallelReader enclosingInstance) { this.enclosingInstance = enclosingInstance; } private ParallelReader enclosingInstance; public ParallelReader Enclosing_Instance { get { return enclosingInstance; } } private System.String field; private IEnumerator fieldIterator; private TermEnum termEnum; private bool isDisposed; public ParallelTermEnum(ParallelReader enclosingInstance) { InitBlock(enclosingInstance); try { field = Enclosing_Instance.fieldToReader.Keys.First(); } catch (ArgumentOutOfRangeException) { // No fields, so keep field == null, termEnum == null return; } if (field != null) termEnum = Enclosing_Instance.fieldToReader[field].Terms(); } public ParallelTermEnum(ParallelReader enclosingInstance, Term term) { InitBlock(enclosingInstance); field = term.Field; IndexReader reader = Enclosing_Instance.fieldToReader[field]; if (reader != null) termEnum = reader.Terms(term); } public override bool Next() { if (termEnum == null) return false; // another term in this field? if (termEnum.Next() && (System.Object) termEnum.Term.Field == (System.Object) field) return true; // yes, keep going termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { var newList = new List(); if (Enclosing_Instance.fieldToReader != null && Enclosing_Instance.fieldToReader.Count > 0) { var comparer = Enclosing_Instance.fieldToReader.Comparer; foreach(var entry in Enclosing_Instance.fieldToReader.Keys.Where(x => comparer.Compare(x, field) >= 0)) newList.Add(entry); } fieldIterator = newList.Skip(1).GetEnumerator(); // Skip field to get next one } while (fieldIterator.MoveNext()) { field = fieldIterator.Current; termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); Term term = termEnum.Term; if (term != null && (System.Object) term.Field == (System.Object) field) return true; else termEnum.Close(); } return false; // no more fields } public override Term Term { get { if (termEnum == null) return null; return termEnum.Term; } } public override int DocFreq() { if (termEnum == null) return 0; return termEnum.DocFreq(); } protected override void Dispose(bool disposing) { if (isDisposed) return; if (disposing) { if (termEnum != null) termEnum.Close(); } isDisposed = true; } } // wrap a TermDocs in order to support seek(Term) private class ParallelTermDocs : TermDocs { private void InitBlock(ParallelReader enclosingInstance) { this.enclosingInstance = enclosingInstance; } private ParallelReader enclosingInstance; public ParallelReader Enclosing_Instance { get { return enclosingInstance; } } protected internal TermDocs termDocs; private bool isDisposed; public ParallelTermDocs(ParallelReader enclosingInstance) { InitBlock(enclosingInstance); } public ParallelTermDocs(ParallelReader enclosingInstance, Term term) { InitBlock(enclosingInstance); if(term == null) termDocs = (Enclosing_Instance.readers.Count == 0) ? null : Enclosing_Instance.readers[0].TermDocs(null); else Seek(term); } public virtual int Doc { get { return termDocs.Doc; } } public virtual int Freq { get { return termDocs.Freq; } } public virtual void Seek(Term term) { IndexReader reader = Enclosing_Instance.fieldToReader[term.Field]; termDocs = reader != null?reader.TermDocs(term):null; } public virtual void Seek(TermEnum termEnum) { Seek(termEnum.Term); } public virtual bool Next() { if (termDocs == null) return false; return termDocs.Next(); } public virtual int Read(int[] docs, int[] freqs) { if (termDocs == null) return 0; return termDocs.Read(docs, freqs); } public virtual bool SkipTo(int target) { if (termDocs == null) return false; return termDocs.SkipTo(target); } [Obsolete("Use Dispose() instead")] public virtual void Close() { Dispose(); } public void Dispose() { Dispose(true); } protected virtual void Dispose(bool disposing) { if (isDisposed) return; if (disposing) { if (termDocs != null) termDocs.Close(); } isDisposed = true; } } private class ParallelTermPositions:ParallelTermDocs, TermPositions { private void InitBlock(ParallelReader enclosingInstance) { this.enclosingInstance = enclosingInstance; } private ParallelReader enclosingInstance; public new ParallelReader Enclosing_Instance { get { return enclosingInstance; } } public ParallelTermPositions(ParallelReader enclosingInstance):base(enclosingInstance) { InitBlock(enclosingInstance); } public ParallelTermPositions(ParallelReader enclosingInstance, Term term):base(enclosingInstance) { InitBlock(enclosingInstance); Seek(term); } public override void Seek(Term term) { IndexReader reader = Enclosing_Instance.fieldToReader[term.Field]; termDocs = reader != null?reader.TermPositions(term):null; } public virtual int NextPosition() { // It is an error to call this if there is no next position, e.g. if termDocs==null return ((TermPositions) termDocs).NextPosition(); } public virtual int PayloadLength { get { return ((TermPositions) termDocs).PayloadLength; } } public virtual byte[] GetPayload(byte[] data, int offset) { return ((TermPositions) termDocs).GetPayload(data, offset); } // TODO: Remove warning after API has been finalized public virtual bool IsPayloadAvailable { get { return ((TermPositions) termDocs).IsPayloadAvailable; } } } } }