/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using NUnit.Framework; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; using Directory = Lucene.Net.Store.Directory; using FSDirectory = Lucene.Net.Store.FSDirectory; using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; using Lock = Lucene.Net.Store.Lock; using LockFactory = Lucene.Net.Store.LockFactory; using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory; using RAMDirectory = Lucene.Net.Store.RAMDirectory; using SingleInstanceLockFactory = Lucene.Net.Store.SingleInstanceLockFactory; using Analyzer = Lucene.Net.Analysis.Analyzer; using SinkTokenizer = Lucene.Net.Analysis.SinkTokenizer; using Token = Lucene.Net.Analysis.Token; using TokenFilter = Lucene.Net.Analysis.TokenFilter; using TokenStream = Lucene.Net.Analysis.TokenStream; using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer; using WhitespaceTokenizer = Lucene.Net.Analysis.WhitespaceTokenizer; using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer; using StandardTokenizer = Lucene.Net.Analysis.Standard.StandardTokenizer; using Hits = Lucene.Net.Search.Hits; using IndexSearcher = Lucene.Net.Search.IndexSearcher; using TermQuery = Lucene.Net.Search.TermQuery; using Query = Lucene.Net.Search.Query; using PhraseQuery = Lucene.Net.Search.PhraseQuery; using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; using _TestUtil = Lucene.Net.Util._TestUtil; namespace Lucene.Net.Index { /// /// $Id: TestIndexWriter.java 628085 2008-02-15 15:18:22Z mikemccand $ /// [TestFixture] public class TestIndexWriter : LuceneTestCase { public class MyRAMDirectory : RAMDirectory { private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } private LockFactory myLockFactory; internal MyRAMDirectory(TestIndexWriter enclosingInstance) { InitBlock(enclosingInstance); lockFactory = null; myLockFactory = new SingleInstanceLockFactory(); } public override Lock MakeLock(System.String name) { return myLockFactory.MakeLock(name); } } private class AnonymousClassAnalyzer : Analyzer { public AnonymousClassAnalyzer(TestIndexWriter enclosingInstance) { InitBlock(enclosingInstance); } private class AnonymousClassTokenFilter : TokenFilter { public AnonymousClassTokenFilter(AnonymousClassAnalyzer enclosingInstance, TokenStream ts) : base(ts) { InitBlock(enclosingInstance); } private void InitBlock(AnonymousClassAnalyzer enclosingInstance) { this.enclosingInstance = enclosingInstance; } private AnonymousClassAnalyzer enclosingInstance; public AnonymousClassAnalyzer Enclosing_Instance { get { return enclosingInstance; } } private int count = 0; public override Token Next() { if (count++ == 5) { throw new System.IO.IOException(); } return input.Next(); } } private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { return new AnonymousClassTokenFilter(this, new StandardTokenizer(reader)); } } private class AnonymousClassAnalyzer1 : Analyzer { public AnonymousClassAnalyzer1(TestIndexWriter enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { return new CrashingFilter(Enclosing_Instance, fieldName, new WhitespaceTokenizer(reader)); } } private class AnonymousClassAnalyzer2 : Analyzer { public AnonymousClassAnalyzer2(TestIndexWriter enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { return new CrashingFilter(Enclosing_Instance, fieldName, new WhitespaceTokenizer(reader)); } } private class AnonymousClassThread : SupportClass.ThreadClass { public AnonymousClassThread(int NUM_ITER, IndexWriter writer, int finalI, TestIndexWriter enclosingInstance) { InitBlock(NUM_ITER, writer, finalI, enclosingInstance); } private void InitBlock(int NUM_ITER, IndexWriter writer, int finalI, TestIndexWriter enclosingInstance) { this.NUM_ITER = NUM_ITER; this.writer = writer; this.finalI = finalI; this.enclosingInstance = enclosingInstance; } private int NUM_ITER; private IndexWriter writer; private int finalI; private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } override public void Run() { try { for (int iter = 0; iter < NUM_ITER; iter++) { Document doc = new Document(); doc.Add(new Field("contents", "here are some contents", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.AddDocument(doc); doc.Add(new Field("crash", "this should crash after 4 terms", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("other", "this will not get indexed", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); try { writer.AddDocument(doc); Assert.Fail("did not hit expected exception"); } catch (System.IO.IOException) { } if (0 == finalI) { doc = new Document(); doc.Add(new Field("contents", "here are some contents", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.AddDocument(doc); } } } catch (System.Exception t) { lock (this) { System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": ERROR: hit unexpected exception"); System.Console.Out.WriteLine(t.StackTrace); } Assert.Fail(); } } } private class AnonymousClassThread1 : SupportClass.ThreadClass { public AnonymousClassThread1(IndexWriter finalWriter, Document doc, System.Collections.ArrayList failure, TestIndexWriter enclosingInstance) { InitBlock(finalWriter, doc, failure, enclosingInstance); } private void InitBlock(IndexWriter finalWriter, Document doc, System.Collections.ArrayList failure, TestIndexWriter enclosingInstance) { this.finalWriter = finalWriter; this.doc = doc; this.failure = failure; this.enclosingInstance = enclosingInstance; } private IndexWriter finalWriter; private Document doc; private System.Collections.ArrayList failure; private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } override public void Run() { bool done = false; while (!done) { for (int i = 0; i < 100; i++) { try { finalWriter.AddDocument(doc); } catch (AlreadyClosedException) { done = true; break; } catch (System.NullReferenceException) { done = true; break; } catch (System.Exception e) { System.Console.Out.WriteLine(e.StackTrace); failure.Add(e); done = true; break; } } System.Threading.Thread.Sleep(0); } } } [Test] public virtual void TestDocCount() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; IndexWriter.SetDefaultWriteLockTimeout(2000); Assert.AreEqual(2000, IndexWriter.GetDefaultWriteLockTimeout()); writer = new IndexWriter(dir, new WhitespaceAnalyzer()); IndexWriter.SetDefaultWriteLockTimeout(1000); // add 100 documents for (i = 0; i < 100; i++) { AddDoc(writer); } Assert.AreEqual(100, writer.DocCount()); writer.Close(); // delete 40 documents reader = IndexReader.Open(dir); for (i = 0; i < 40; i++) { reader.DeleteDocument(i); } reader.Close(); // test doc count before segments are merged/index is optimized writer = new IndexWriter(dir, new WhitespaceAnalyzer()); Assert.AreEqual(100, writer.DocCount()); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, true, new WhitespaceAnalyzer()); writer.Optimize(); Assert.AreEqual(60, writer.DocCount()); writer.Close(); // check that the index reader gives the same numbers. reader = IndexReader.Open(dir); Assert.AreEqual(60, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // make sure opening a new index for create over // this existing one works correctly: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); Assert.AreEqual(0, writer.DocCount()); writer.Close(); } private void AddDoc(IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); } private void AddDocWithIndex(IndexWriter writer, int index) { Document doc = new Document(); doc.Add(new Field("content", "aaa " + index, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("id", "" + index, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } /* Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in fact added. */ [Test] public virtual void TestAddIndexOnDiskFull() { int START_COUNT = 57; int NUM_DIR = 50; int END_COUNT = START_COUNT + NUM_DIR * 25; bool debug = false; // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = new RAMDirectory(); IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true); for (int j = 0; j < 25; j++) { AddDocWithIndex(writer, 25 * i + j); } writer.Close(); System.String[] files = dirs[i].List(); for (int j = 0; j < files.Length; j++) { inputDiskUsage += dirs[i].FileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: RAMDirectory startDir = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(startDir, new WhitespaceAnalyzer(), true); for (int j = 0; j < START_COUNT; j++) { AddDocWithIndex(writer2, j); } writer2.Close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = IndexReader.Open(startDir); Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq"); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(57, hits.Length(), "first number of hits"); searcher.Close(); reader.Close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.list(); long diskUsage = startDir.SizeInBytes(); long startDiskUsage = 0; System.String[] files2 = startDir.List(); for (int i = 0; i < files2.Length; i++) { startDiskUsage += startDir.FileLength(files2[i]); } for (int iter = 0; iter < 6; iter++) { if (debug) System.Console.Out.WriteLine("TEST: iter=" + iter); // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + 100; bool autoCommit = iter % 2 == 0; int method = iter / 2; bool success = false; bool done = false; System.String methodName; if (0 == method) { methodName = "addIndexes(Directory[])"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexesNoOptimize(Directory[])"; } while (!done) { // Make a new dir that will enforce disk usage: MockRAMDirectory dir = new MockRAMDirectory(startDir); writer2 = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); System.IO.IOException err = null; MergeScheduler ms = writer2.GetMergeScheduler(); for (int x = 0; x < 2; x++) { if (ms is ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. if (0 == x) ((ConcurrentMergeScheduler)ms).SetSuppressExceptions_ForNUnitTest(); else ((ConcurrentMergeScheduler) ms).ClearSuppressExceptions_ForNUnitTest(); // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; System.String testName = null; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes autoCommit=" + autoCommit; } else { thisDiskFree = 0; rate = 0.0; if (debug) testName = "disk full test " + methodName + " with unlimited disk space autoCommit=" + autoCommit; } if (debug) System.Console.Out.WriteLine("\ncycle: " + testName); dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == method) { writer2.AddIndexes(dirs); } else if (1 == method) { IndexReader[] readers = new IndexReader[dirs.Length]; for (int i = 0; i < dirs.Length; i++) { readers[i] = IndexReader.Open(dirs[i]); } try { writer2.AddIndexes(readers); } finally { for (int i = 0; i < dirs.Length; i++) { readers[i].Close(); } } } else { writer2.AddIndexesNoOptimize(dirs); } success = true; if (debug) { System.Console.Out.WriteLine(" success!"); } if (0 == x) { done = true; } } catch (System.IO.IOException e) { success = false; err = e; if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); System.Console.Out.WriteLine(e.StackTrace); } if (1 == x) { System.Console.Out.WriteLine(e.StackTrace); Assert.Fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done _TestUtil.SyncConcurrentMerges(writer2); if (autoCommit) { // Whether we succeeded or failed, check that // all un-referenced files were in fact // deleted (ie, we did not create garbage). // Only check this when autoCommit is true: // when it's false, it's expected that there // are unreferenced files (ie they won't be // referenced until the "commit on close"). // Just create a new IndexFileDeleter, have it // delete unreferenced files, then verify that // in fact no files were deleted: System.String successStr; if (success) { successStr = "success"; } else { successStr = "IOException"; } System.String message = methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes)"; AssertNoUnreferencedFiles(dir, message); } if (debug) { System.Console.Out.WriteLine(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): try { reader = IndexReader.Open(dir); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.DocFreq(searchTerm); if (success) { if (autoCommit && result != END_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT); } else if (!autoCommit && result != START_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]"); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { System.Console.Out.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = new IndexSearcher(reader); try { hits = searcher.Search(new TermQuery(searchTerm)); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length(); if (success) { if (result2 != result) { Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { System.Console.Out.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } searcher.Close(); reader.Close(); if (debug) { System.Console.Out.WriteLine(" count is " + result); } if (done || result == END_COUNT) { break; } } if (debug) { System.Console.Out.WriteLine(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.GetMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: Assert.IsTrue( (dir.GetMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.GetMaxUsedSizeInBytes() - startDiskUsage) + " bytes; " + "starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes" ); } writer2.Close(); // Wait for all BG threads to finish else // dir.close() will throw IOException because // there are still open files _TestUtil.SyncConcurrentMerges(ms); dir.Close(); // Try again with 2000 more bytes of free space: diskFree += 2000; } } startDir.Close(); } /* * Make sure IndexWriter cleans up on hitting a disk * full exception in addDocument. */ [Test] public virtual void TestAddDocumentOnDiskFull() { bool debug = false; for (int pass = 0; pass < 3; pass++) { if (debug) System.Console.Out.WriteLine("TEST: pass=" + pass); bool autoCommit = pass == 0; bool doAbort = pass == 2; long diskFree = 200; while (true) { if (debug) System.Console.Out.WriteLine("TEST: cycle: diskFree=" + diskFree); MockRAMDirectory dir = new MockRAMDirectory(); dir.SetMaxSizeInBytes(diskFree); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); MergeScheduler ms = writer.GetMergeScheduler(); if (ms is ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. ((ConcurrentMergeScheduler)ms).SetSuppressExceptions_ForNUnitTest(); bool hitError = false; try { for (int i = 0; i < 200; i++) { AddDoc(writer); } } catch (System.IO.IOException e) { if (debug) { System.Console.Out.WriteLine("TEST: exception on addDoc"); System.Console.Out.WriteLine(e.StackTrace); } hitError = true; } if (hitError) { if (doAbort) { writer.Abort(); } else { try { writer.Close(); } catch (System.IO.IOException e) { if (debug) { System.Console.Out.WriteLine("TEST: exception on close"); System.Console.Out.WriteLine(e.StackTrace); } dir.SetMaxSizeInBytes(0); writer.Close(); } } _TestUtil.SyncConcurrentMerges(ms); AssertNoUnreferencedFiles(dir, "after disk full during addDocument with autoCommit=" + autoCommit); // Make sure reader can open the index: IndexReader.Open(dir).Close(); dir.Close(); // Now try again w/ more space: diskFree += 500; } else { _TestUtil.SyncConcurrentMerges(writer); dir.Close(); break; } } } } public static void AssertNoUnreferencedFiles(Directory dir, System.String message) { System.String[] startFiles = dir.List(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null); System.String[] endFiles = dir.List(); System.Array.Sort(startFiles); System.Array.Sort(endFiles); //if (!startFiles.Equals(endFiles)) //{ // Assert.Fail(message + ": before delete:\n " + ArrayToString(startFiles) + "\n after delete:\n " + ArrayToString(endFiles)); //} string startArray = ArrayToString(startFiles); string endArray = ArrayToString(endFiles); if (!startArray.Equals(endArray)) { Assert.Fail(message + ": before delete:\n " + startArray + "\n after delete:\n " + endArray); } } /// Make sure we skip wicked long terms. [Test] public virtual void TestWickedLongTerm() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); char[] chars = new char[16383]; for (int index = 0; index < chars.Length; index++) chars.SetValue('x', index); Document doc = new Document(); System.String bigTerm = new System.String(chars); // Max length term is 16383, so this contents produces // a too-long term: System.String contents = "abc xyz x" + bigTerm + " another term"; doc.Add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.Add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); // Make sure all terms < max size were indexed Assert.AreEqual(2, reader.DocFreq(new Term("content", "abc"))); Assert.AreEqual(1, reader.DocFreq(new Term("content", "bbb"))); Assert.AreEqual(1, reader.DocFreq(new Term("content", "term"))); Assert.AreEqual(1, reader.DocFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: TermPositions tps = reader.TermPositions(new Term("content", "another")); Assert.IsTrue(tps.Next()); Assert.AreEqual(1, tps.Freq()); Assert.AreEqual(3, tps.NextPosition()); // Make sure the doc that has the massive term is in // the index: Assert.AreEqual(2, reader.NumDocs(), "document with wicked long term should is not in the index!"); reader.Close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.Add(new Field("content", bigTerm, Field.Store.NO, Field.Index.TOKENIZED)); StandardAnalyzer sa = new StandardAnalyzer(); sa.SetMaxTokenLength(100000); writer = new IndexWriter(dir, sa); writer.AddDocument(doc); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(1, reader.DocFreq(new Term("content", bigTerm))); reader.Close(); dir.Close(); } [Test] public virtual void TestOptimizeMaxNumSegments() { MockRAMDirectory dir = new MockRAMDirectory(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); for (int numDocs = 38; numDocs < 500; numDocs += 38) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.SetMinMergeDocs(1); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(5); writer.SetMaxBufferedDocs(2); for (int j = 0; j < numDocs; j++) writer.AddDocument(doc); writer.Close(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Count; writer = new IndexWriter(dir, new WhitespaceAnalyzer()); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(5); writer.Optimize(3); writer.Close(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 3) Assert.AreEqual(segCount, optSegCount); else Assert.AreEqual(3, optSegCount); } } [Test] public virtual void TestOptimizeMaxNumSegments2() { MockRAMDirectory dir = new MockRAMDirectory(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.SetMinMergeDocs(1); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(4); writer.SetMaxBufferedDocs(2); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) writer.AddDocument(doc); writer.Flush(); SegmentInfos sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync(); sis.Read(dir); int segCount = sis.Count; writer.Optimize(7); sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 7) Assert.AreEqual(segCount, optSegCount); else Assert.AreEqual(7, optSegCount); } } /// Make sure optimize doesn't use any more than 1X /// starting index size as its temporary free space /// required. /// [Test] public virtual void TestOptimizeTempSpaceUsage() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int j = 0; j < 500; j++) { AddDocWithIndex(writer, j); } writer.Close(); long startDiskUsage = 0; System.String[] files = dir.List(); for (int i = 0; i < files.Length; i++) { startDiskUsage += dir.FileLength(files[i]); } dir.ResetMaxUsedSizeInBytes(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); long maxDiskUsage = dir.GetMaxUsedSizeInBytes(); Assert.IsTrue( maxDiskUsage <= 2 * startDiskUsage, "optimized used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (2 * startDiskUsage) + " (= 2X starting usage)" ); dir.Close(); } internal static System.String ArrayToString(System.String[] l) { System.String s = ""; for (int i = 0; i < l.Length; i++) { if (i > 0) { s += "\n "; } s += l[i]; } return s; } // Make sure we can open an index for create even when a // reader holds it open (this fails pre lock-less // commits on windows): [Test] public virtual void TestCreateWithReader() { System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); System.IO.FileInfo indexDir = new System.IO.FileInfo(tempDir + "\\" + "lucenetestindexwriter"); try { Directory dir = FSDirectory.GetDirectory(indexDir); // add one document & close writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); AddDoc(writer); writer.Close(); // now open reader: IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); // now open index for create: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); Assert.AreEqual(writer.DocCount(), 0, "should be zero documents"); AddDoc(writer); writer.Close(); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); IndexReader reader2 = IndexReader.Open(dir); Assert.AreEqual(reader2.NumDocs(), 1, "should be one document"); reader.Close(); reader2.Close(); } finally { RmDir(indexDir); } } // Same test as above, but use IndexWriter constructor // that takes File: [Test] public virtual void TestCreateWithReader2() { System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); System.IO.FileInfo indexDir = new System.IO.FileInfo(tempDir + "\\" + "lucenetestindexwriter"); try { // add one document & close writer IndexWriter writer = new IndexWriter(indexDir, new WhitespaceAnalyzer(), true); AddDoc(writer); writer.Close(); // now open reader: IndexReader reader = IndexReader.Open(indexDir); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); // now open index for create: writer = new IndexWriter(indexDir, new WhitespaceAnalyzer(), true); Assert.AreEqual(writer.DocCount(), 0, "should be zero documents"); AddDoc(writer); writer.Close(); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); IndexReader reader2 = IndexReader.Open(indexDir); Assert.AreEqual(reader2.NumDocs(), 1, "should be one document"); reader.Close(); reader2.Close(); } finally { RmDir(indexDir); } } // Same test as above, but use IndexWriter constructor // that takes String: [Test] public virtual void TestCreateWithReader3() { System.String tempDir = SupportClass.AppSettings.Get("tempDir", ""); if (tempDir == null) throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); System.String dirName = tempDir + "/lucenetestindexwriter"; try { // add one document & close writer IndexWriter writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true); AddDoc(writer); writer.Close(); // now open reader: IndexReader reader = IndexReader.Open(dirName); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); // now open index for create: writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true); Assert.AreEqual(writer.DocCount(), 0, "should be zero documents"); AddDoc(writer); writer.Close(); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); IndexReader reader2 = IndexReader.Open(dirName); Assert.AreEqual(reader2.NumDocs(), 1, "should be one document"); reader.Close(); reader2.Close(); } finally { RmDir(new System.IO.FileInfo(dirName)); } } // Simulate a writer that crashed while writing segments // file: make sure we can still open the index (ie, // gracefully fallback to the previous segments file), // and that we can add to the index: [Test] public virtual void TestSimulatedCrashedWriter() { Directory dir = new RAMDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // close writer.Close(); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); Assert.IsTrue(gen > 1, "segment generation should be > 1 but got " + gen); // Make the next segments file, with last byte // missing, to simulate a writer that crashed while // writing segments file: System.String fileNameIn = SegmentInfos.GetCurrentSegmentFileName(dir); System.String fileNameOut = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1 + gen); IndexInput in_Renamed = dir.OpenInput(fileNameIn); IndexOutput out_Renamed = dir.CreateOutput(fileNameOut); long length = in_Renamed.Length(); for (int i = 0; i < length - 1; i++) { out_Renamed.WriteByte(in_Renamed.ReadByte()); } in_Renamed.Close(); out_Renamed.Close(); IndexReader reader = null; try { reader = IndexReader.Open(dir); } catch (System.Exception) { Assert.Fail("reader failed to open on a crashed index"); } reader.Close(); try { writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); } catch (System.Exception) { Assert.Fail("writer failed to open on a crashed index"); } // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // close writer.Close(); } // Simulate a corrupt index by removing last byte of // latest segments file and make sure we get an // IOException trying to open the index: [Test] public virtual void TestSimulatedCorruptIndex1() { Directory dir = new RAMDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // close writer.Close(); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); Assert.IsTrue(gen > 1, "segment generation should be > 1 but got " + gen); System.String fileNameIn = SegmentInfos.GetCurrentSegmentFileName(dir); System.String fileNameOut = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1 + gen); IndexInput in_Renamed = dir.OpenInput(fileNameIn); IndexOutput out_Renamed = dir.CreateOutput(fileNameOut); long length = in_Renamed.Length(); for (int i = 0; i < length - 1; i++) { out_Renamed.WriteByte(in_Renamed.ReadByte()); } in_Renamed.Close(); out_Renamed.Close(); dir.DeleteFile(fileNameIn); IndexReader reader = null; try { reader = IndexReader.Open(dir); Assert.Fail("reader did not hit IOException on opening a corrupt index"); } catch (System.Exception) { } if (reader != null) { reader.Close(); } } [Test] public virtual void TestChangesAfterClose() { Directory dir = new RAMDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); AddDoc(writer); // close writer.Close(); try { AddDoc(writer); Assert.Fail("did not hit AlreadyClosedException"); } catch (AlreadyClosedException) { // expected } } // Simulate a corrupt index by removing one of the cfs // files and make sure we get an IOException trying to // open the index: [Test] public virtual void TestSimulatedCorruptIndex2() { Directory dir = new RAMDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // close writer.Close(); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); Assert.IsTrue(gen > 1, "segment generation should be > 1 but got " + gen); System.String[] files = dir.List(); for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { dir.DeleteFile(files[i]); break; } } IndexReader reader = null; try { reader = IndexReader.Open(dir); Assert.Fail("reader did not hit IOException on opening a corrupt index"); } catch (System.Exception) { } if (reader != null) { reader.Close(); } } /* * Simple test for "commit on close": open writer with * autoCommit=false, so it will only commit on close, * then add a bunch of docs, making sure reader does not * see these docs until writer is closed. */ [Test] public virtual void TestCommitOnClose() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < 14; i++) { AddDoc(writer); } writer.Close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "first number of hits"); searcher.Close(); IndexReader reader = IndexReader.Open(dir); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); for (int i = 0; i < 3; i++) { for (int j = 0; j < 11; j++) { AddDoc(writer); } searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled"); searcher.Close(); Assert.IsTrue(reader.IsCurrent(), "reader should have still been current"); } // Now, close the writer: writer.Close(); Assert.IsFalse(reader.IsCurrent(), "reader should not be current now"); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(47, hits.Length(), "reader did not see changes after writer was closed"); searcher.Close(); } /* * Simple test for "commit on close": open writer with * autoCommit=false, so it will only commit on close, * then add a bunch of docs, making sure reader does not * see them until writer has closed. Then instead of * closing the writer, call abort and verify reader sees * nothing was added. Then verify we can open the index * and add docs to it. */ [Test] public virtual void TestCommitOnCloseAbort() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); for (int i = 0; i < 14; i++) { AddDoc(writer); } writer.Close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "first number of hits"); searcher.Close(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.SetMaxBufferedDocs(10); for (int j = 0; j < 17; j++) { AddDoc(writer); } // Delete all docs: writer.DeleteDocuments(searchTerm); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled"); searcher.Close(); // Now, close the writer: writer.Abort(); AssertNoUnreferencedFiles(dir, "unreferenced files remain after abort()"); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "saw changes after writer.abort"); searcher.Close(); // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.SetMaxBufferedDocs(10); for (int i = 0; i < 12; i++) { for (int j = 0; j < 17; j++) { AddDoc(writer); } searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled"); searcher.Close(); } writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(218, hits.Length(), "didn't see changes after close"); searcher.Close(); dir.Close(); } /* * Verify that a writer with "commit on close" indeed * cleans up the temp segments created after opening * that are not referenced by the starting segments * file. We check this by using MockRAMDirectory to * measure max temp disk space used. */ [Test] public virtual void TestCommitOnCloseDiskUsage() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int j = 0; j < 30; j++) { AddDocWithIndex(writer, j); } writer.Close(); dir.ResetMaxUsedSizeInBytes(); long startDiskUsage = dir.GetMaxUsedSizeInBytes(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); for (int j = 0; j < 1470; j++) { AddDocWithIndex(writer, j); } long midDiskUsage = dir.GetMaxUsedSizeInBytes(); dir.ResetMaxUsedSizeInBytes(); writer.Optimize(); writer.Close(); long endDiskUsage = dir.GetMaxUsedSizeInBytes(); // Ending index is 50X as large as starting index; due // to 2X disk usage normally we allow 100X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 100X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); Assert.IsTrue(midDiskUsage < 100 * startDiskUsage, "writer used to much space while adding documents when autoCommit=false"); Assert.IsTrue(endDiskUsage < 100 * startDiskUsage, "writer used to much space after close when autoCommit=false"); } /* * Verify that calling optimize when writer is open for * "commit on close" works correctly both for abort() * and close(). */ [Test] public virtual void TestCommitOnCloseOptimize() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); for (int j = 0; j < 17; j++) { AddDocWithIndex(writer, j); } writer.Close(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.Optimize(); // Open a reader before closing (commiting) the writer: IndexReader reader = IndexReader.Open(dir); // Reader should see index as unoptimized at this // point: Assert.IsFalse(reader.IsOptimized(), "Reader incorrectly sees that the index is optimized"); reader.Close(); // Abort the writer: writer.Abort(); AssertNoUnreferencedFiles(dir, "aborted writer after optimize"); // Open a reader after aborting writer: reader = IndexReader.Open(dir); // Reader should still see index as unoptimized: Assert.IsFalse(reader.IsOptimized(), "Reader incorrectly sees that the index is optimized"); reader.Close(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); AssertNoUnreferencedFiles(dir, "aborted writer after optimize"); // Open a reader after aborting writer: reader = IndexReader.Open(dir); // Reader should still see index as unoptimized: Assert.IsTrue(reader.IsOptimized(), "Reader incorrectly sees that the index is unoptimized"); reader.Close(); } [Test] public virtual void TestIndexNoDocuments() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.Flush(); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(0, reader.MaxDoc()); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Flush(); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(0, reader.MaxDoc()); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); } [Test] public virtual void TestManyFields() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); for (int j = 0; j < 100; j++) { Document doc = new Document(); doc.Add(new Field("a" + j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("b" + j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("c" + j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("d" + j, "aaa", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("e" + j, "aaa", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("f" + j, "aaa", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(100, reader.NumDocs()); for (int j = 0; j < 100; j++) { Assert.AreEqual(1, reader.DocFreq(new Term("a" + j, "aaa" + j))); Assert.AreEqual(1, reader.DocFreq(new Term("b" + j, "aaa" + j))); Assert.AreEqual(1, reader.DocFreq(new Term("c" + j, "aaa" + j))); Assert.AreEqual(1, reader.DocFreq(new Term("d" + j, "aaa"))); Assert.AreEqual(1, reader.DocFreq(new Term("e" + j, "aaa"))); Assert.AreEqual(1, reader.DocFreq(new Term("f" + j, "aaa"))); } reader.Close(); dir.Close(); } [Test] public virtual void TestSmallRAMBuffer() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetRAMBufferSizeMB(0.000001); int lastNumFile = dir.List().Length; for (int j = 0; j < 9; j++) { Document doc = new Document(); doc.Add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); int numFile = dir.List().Length; // Verify that with a tiny RAM buffer we see new // segment after every doc Assert.IsTrue(numFile > lastNumFile); lastNumFile = numFile; } writer.Close(); dir.Close(); } // Make sure it's OK to change RAM buffer size and // maxBufferedDocs in a write session [Test] public virtual void TestChangingRAMBuffer() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); long lastGen = - 1; for (int j = 1; j < 52; j++) { Document doc = new Document(); doc.Add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); _TestUtil.SyncConcurrentMerges(writer); long gen = SegmentInfos.GenerationFromSegmentsFileName(SegmentInfos.GetCurrentSegmentFileName(dir.List())); if (j == 1) lastGen = gen; else if (j < 10) // No new files should be created Assert.AreEqual(gen, lastGen); else if (10 == j) { Assert.IsTrue(gen > lastGen); lastGen = gen; writer.SetRAMBufferSizeMB(0.000001); writer.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 20) { Assert.IsTrue(gen > lastGen); lastGen = gen; } else if (20 == j) { writer.SetRAMBufferSizeMB(16); writer.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 30) { Assert.AreEqual(gen, lastGen); } else if (30 == j) { writer.SetRAMBufferSizeMB(0.000001); writer.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 40) { Assert.IsTrue(gen > lastGen); lastGen = gen; } else if (40 == j) { writer.SetMaxBufferedDocs(10); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 50) { Assert.AreEqual(gen, lastGen); writer.SetMaxBufferedDocs(10); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); } else if (50 == j) { Assert.IsTrue(gen > lastGen); } } writer.Close(); dir.Close(); } [Test] public virtual void TestChangingRAMBuffer2() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); writer.SetMaxBufferedDeleteTerms(10); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); for (int j = 1; j < 52; j++) { Document doc = new Document(); doc.Add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } long lastGen = - 1; for (int j = 1; j < 52; j++) { writer.DeleteDocuments(new Term("field", "aaa" + j)); _TestUtil.SyncConcurrentMerges(writer); long gen = SegmentInfos.GenerationFromSegmentsFileName(SegmentInfos.GetCurrentSegmentFileName(dir.List())); if (j == 1) lastGen = gen; else if (j < 10) { // No new files should be created Assert.AreEqual(gen, lastGen); } else if (10 == j) { Assert.IsTrue(gen > lastGen); lastGen = gen; writer.SetRAMBufferSizeMB(0.000001); writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 20) { Assert.IsTrue(gen > lastGen); lastGen = gen; } else if (20 == j) { writer.SetRAMBufferSizeMB(16); writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 30) { Assert.AreEqual(gen, lastGen); } else if (30 == j) { writer.SetRAMBufferSizeMB(0.000001); writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 40) { Assert.IsTrue(gen > lastGen); lastGen = gen; } else if (40 == j) { writer.SetMaxBufferedDeleteTerms(10); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 50) { Assert.AreEqual(gen, lastGen); writer.SetMaxBufferedDeleteTerms(10); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); } else if (50 == j) { Assert.IsTrue(gen > lastGen); } } writer.Close(); dir.Close(); } [Test] public virtual void TestDiverseDocs() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetRAMBufferSizeMB(0.5); System.Random rand = new System.Random((System.Int32) 31415); for (int i = 0; i < 3; i++) { // First, docs where every term is unique (heavy on // Posting instances) for (int j = 0; j < 100; j++) { Document doc = new Document(); for (int k = 0; k < 100; k++) { doc.Add(new Field("field", System.Convert.ToString(rand.Next()), Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } // Next, many single term docs where only one term // occurs (heavy on byte blocks) for (int j = 0; j < 100; j++) { Document doc = new Document(); doc.Add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } // Next, many single term docs where only one term // occurs but the terms are very long (heavy on // char[] arrays) for (int j = 0; j < 100; j++) { System.Text.StringBuilder b = new System.Text.StringBuilder(); System.String x = System.Convert.ToString(j) + "."; for (int k = 0; k < 1000; k++) b.Append(x); System.String longTerm = b.ToString(); Document doc = new Document(); doc.Add(new Field("field", longTerm, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } } writer.Close(); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(new Term("field", "aaa"))); Assert.AreEqual(300, hits.Length()); searcher.Close(); dir.Close(); } [Test] public virtual void TestEnablingNorms() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, pre flush for (int j = 0; j < 10; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 8) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); Term searchTerm = new Term("field", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(10, hits.Length()); searcher.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, post flush for (int j = 0; j < 27; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 26) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(27, hits.Length()); searcher.Close(); IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.Close(); } [Test] public virtual void TestHighFreqTerm() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetRAMBufferSizeMB(0.01); writer.SetMaxFieldLength(100000000); // Massive doc that has 128 K a's System.Text.StringBuilder b = new System.Text.StringBuilder(1024 * 1024); for (int i = 0; i < 4096; i++) { b.Append(" a a a a a a a a"); b.Append(" a a a a a a a a"); b.Append(" a a a a a a a a"); b.Append(" a a a a a a a a"); } Document doc = new Document(); doc.Add(new Field("field", b.ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(1, reader.MaxDoc()); Assert.AreEqual(1, reader.NumDocs()); Term t = new Term("field", "a"); Assert.AreEqual(1, reader.DocFreq(t)); TermDocs td = reader.TermDocs(t); td.Next(); Assert.AreEqual(128 * 1024, td.Freq()); reader.Close(); dir.Close(); } // Make sure that a Directory implementation that does // not use LockFactory at all (ie overrides makeLock and // implements its own private locking) works OK. This // was raised on java-dev as loss of backwards // compatibility. [Test] public virtual void TestNullLockFactory() { Directory dir = new MyRAMDirectory(this); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < 100; i++) { AddDoc(writer); } writer.Close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(100, hits.Length(), "did not get right number of hits"); writer.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.Close(); dir.Close(); } [Test] public virtual void TestFlushWithNoMerging() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 19; i++) writer.AddDocument(doc); writer.Flush(false, true); writer.Close(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); // Since we flushed w/o allowing merging we should now // have 10 segments System.Diagnostics.Debug.Assert(sis.Count == 10); } // Make sure we can flush segment w/ norms, then add // empty doc (no norms) and flush [Test] public virtual void TestEmptyDocAfterFlushingRealDoc() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.Flush(); writer.AddDocument(new Document()); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(2, reader.NumDocs()); } // Test calling optimize(false) whereby optimize is kicked // off but we don't wait for it to finish (but // writer.close()) does wait [Test] public virtual void TestBackgroundOptimize() { Directory dir = new MockRAMDirectory(); for (int pass = 0; pass < 2; pass++) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMergeScheduler(new ConcurrentMergeScheduler()); Document doc = new Document(); doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(101); for (int i = 0; i < 200; i++) writer.AddDocument(doc); writer.Optimize(false); if (0 == pass) { writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.IsOptimized()); reader.Close(); } else { // Get another segment to flush so we can verify it is // NOT included in the optimization writer.AddDocument(doc); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(!reader.IsOptimized()); reader.Close(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); Assert.AreEqual(2, infos.Count); } } dir.Close(); } private void RmDir(System.IO.FileInfo dir) { String[] fullpathnames = System.IO.Directory.GetFileSystemEntries(dir.FullName); System.IO.FileInfo[] files = new System.IO.FileInfo[fullpathnames.Length]; for (int i = 0; i < files.Length; i++) files[i] = new System.IO.FileInfo(fullpathnames[i]); if (files != null) { for (int i = 0; i < files.Length; i++) { bool tmpBool; if (System.IO.File.Exists(files[i].FullName)) { System.IO.File.Delete(files[i].FullName); tmpBool = true; } else if (System.IO.Directory.Exists(files[i].FullName)) { System.IO.Directory.Delete(files[i].FullName); tmpBool = true; } else tmpBool = false; bool generatedAux = tmpBool; } } bool tmpBool2; if (System.IO.File.Exists(dir.FullName)) { System.IO.File.Delete(dir.FullName); tmpBool2 = true; } else if (System.IO.Directory.Exists(dir.FullName)) { System.IO.Directory.Delete(dir.FullName); tmpBool2 = true; } else tmpBool2 = false; bool generatedAux2 = tmpBool2; } /// Test that no NullPointerException will be raised, /// when adding one document with a single, empty field /// and term vectors enabled. /// /// IOException /// /// [Test] public virtual void TestBadSegment() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter ir = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.Add(new Field("tvtest", "", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); ir.AddDocument(document); ir.Close(); dir.Close(); } // LUCENE-1008 [Test] public virtual void TestNoTermVectorAfterTermVector() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.AddDocument(document); document = new Document(); document.Add(new Field("tvtest", "x y z", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); iw.AddDocument(document); // Make first segment iw.Flush(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.AddDocument(document); // Make 2nd segment iw.Flush(); iw.Optimize(); iw.Close(); dir.Close(); } // LUCENE-1010 [Test] public virtual void TestNoTermVectorAfterTermVectorMerge() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.AddDocument(document); iw.Flush(); document = new Document(); document.Add(new Field("tvtest", "x y z", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); iw.AddDocument(document); // Make first segment iw.Flush(); iw.Optimize(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.AddDocument(document); // Make 2nd segment iw.Flush(); iw.Optimize(); iw.Close(); dir.Close(); } // LUCENE-1036 [Test] public virtual void TestMaxThreadPriority() { int pri = (System.Int32) SupportClass.ThreadClass.Current().Priority; try { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.SetMaxBufferedDocs(2); iw.SetMergeFactor(2); SupportClass.ThreadClass.Current().Priority = (System.Threading.ThreadPriority) System.Threading.ThreadPriority.Highest; for (int i = 0; i < 4; i++) iw.AddDocument(document); iw.Close(); } finally { SupportClass.ThreadClass.Current().Priority = (System.Threading.ThreadPriority) pri; } } // Just intercepts all merges & verifies that we are never // merging a segment with >= 20 (maxMergeDocs) docs private class MyMergeScheduler : MergeScheduler { public MyMergeScheduler(TestIndexWriter enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } public override void Merge(IndexWriter writer) { lock (this) { while (true) { MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) break; for (int i = 0; i < merge.Segments_ForNUnitTest.Count; i++) System.Diagnostics.Debug.Assert(merge.Segments_ForNUnitTest.Info(i).docCount < 20); writer.Merge(merge); } } } public override void Close() { } } // LUCENE-1013 [Test] public virtual void TestSetMaxMergeDocs() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); iw.SetMergeScheduler(new MyMergeScheduler(this)); iw.SetMaxMergeDocs(20); iw.SetMaxBufferedDocs(2); iw.SetMergeFactor(2); Document document = new Document(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); for (int i = 0; i < 177; i++) iw.AddDocument(document); iw.Close(); } // LUCENE-1072 [Test] public virtual void TestExceptionFromTokenStream() { RAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new AnonymousClassAnalyzer(this), true); Document doc = new Document(); System.String contents = "aa bb cc dd ee ff gg hh ii jj kk"; doc.Add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED)); try { writer.AddDocument(doc); Assert.Fail("did not hit expected exception"); } catch (System.Exception) { } // Make sure we can add another normal document doc = new Document(); doc.Add(new Field("content", "aa bb cc dd", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.Add(new Field("content", "aa bb cc dd", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); Term t = new Term("content", "aa"); Assert.AreEqual(reader.DocFreq(t), 3); // Make sure the doc that hit the exception was marked // as deleted: TermDocs tdocs = reader.TermDocs(t); int count = 0; while (tdocs.Next()) { count++; } Assert.AreEqual(2, count); Assert.AreEqual(reader.DocFreq(new Term("content", "gg")), 0); reader.Close(); dir.Close(); } private class FailOnlyOnFlush : MockRAMDirectory.Failure { new internal bool doFail = false; internal int count; public override void SetDoFail() { this.doFail = true; } public override void ClearDoFail() { this.doFail = false; } public override void Eval(MockRAMDirectory dir) { if (doFail) { System.Diagnostics.StackFrame[] frames = new System.Diagnostics.StackTrace().GetFrames(); for (int i = 0; i < frames.Length; i++) { System.String methodName = frames[i].GetMethod().Name; System.String className = frames[i].GetMethod().DeclaringType.ToString(); if ("Lucene.Net.Index.DocumentsWriter".Equals(className) && "AppendPostings".Equals(methodName) && count++ == 30) { doFail = false; throw new System.IO.IOException("now failing during flush"); } } } } } // LUCENE-1072: make sure an errant exception on flushing // one segment only takes out those docs in that one flush [Test] public virtual void TestDocumentsWriterAbort() { MockRAMDirectory dir = new MockRAMDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(); failure.SetDoFail(); dir.FailOn(failure); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); writer.SetMaxBufferedDocs(2); Document doc = new Document(); System.String contents = "aa bb cc dd ee ff gg hh ii jj kk"; doc.Add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED)); bool hitError = false; for (int i = 0; i < 200; i++) { try { writer.AddDocument(doc); } catch (System.IO.IOException) { // only one flush should fail: Assert.IsFalse(hitError); hitError = true; } } Assert.IsTrue(hitError); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(198, reader.DocFreq(new Term("content", "aa"))); reader.Close(); } private class CrashingFilter : TokenFilter { private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } internal System.String fieldName; internal int count; public CrashingFilter(TestIndexWriter enclosingInstance, System.String fieldName, TokenStream input):base(input) { InitBlock(enclosingInstance); this.fieldName = fieldName; } public override Token Next(Token result) { if (this.fieldName.Equals("crash") && count++ >= 4) throw new System.IO.IOException("I'm experiencing problems"); return input.Next(result); } public override void Reset() { base.Reset(); count = 0; } } [Test] public virtual void TestDocumentsWriterExceptions() { Analyzer analyzer = new AnonymousClassAnalyzer1(this); for (int i = 0; i < 2; i++) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, analyzer); //writer.setInfoStream(System.out); Document doc = new Document(); doc.Add(new Field("contents", "here are some contents", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.AddDocument(doc); doc.Add(new Field("crash", "this should crash after 4 terms", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("other", "this will not get indexed", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); try { writer.AddDocument(doc); Assert.Fail("did not hit expected exception"); } catch (System.IO.IOException) { } if (0 == i) { doc = new Document(); doc.Add(new Field("contents", "here are some contents", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(dir); int expected = 3 + (1 - i) * 2; Assert.AreEqual(expected, reader.DocFreq(new Term("contents", "here"))); Assert.AreEqual(expected, reader.MaxDoc()); int numDel = 0; for (int j = 0; j < reader.MaxDoc(); j++) { if (reader.IsDeleted(j)) numDel++; else reader.Document(j); reader.GetTermFreqVectors(j); } reader.Close(); Assert.AreEqual(1, numDel); writer = new IndexWriter(dir, analyzer); writer.SetMaxBufferedDocs(10); doc = new Document(); doc.Add(new Field("contents", "here are some contents", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int j = 0; j < 17; j++) writer.AddDocument(doc); writer.Optimize(); writer.Close(); reader = IndexReader.Open(dir); expected = 19 + (1 - i) * 2; Assert.AreEqual(expected, reader.DocFreq(new Term("contents", "here"))); Assert.AreEqual(expected, reader.MaxDoc()); numDel = 0; for (int j = 0; j < reader.MaxDoc(); j++) { if (reader.IsDeleted(j)) numDel++; else reader.Document(j); reader.GetTermFreqVectors(j); } reader.Close(); Assert.AreEqual(0, numDel); dir.Close(); } } [Test] public virtual void TestDocumentsWriterExceptionThreads() { Analyzer analyzer = new AnonymousClassAnalyzer2(this); int NUM_THREAD = 3; int NUM_ITER = 100; for (int i = 0; i < 2; i++) { MockRAMDirectory dir = new MockRAMDirectory(); { IndexWriter writer = new IndexWriter(dir, analyzer); int finalI = i; SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD]; for (int t = 0; t < NUM_THREAD; t++) { threads[t] = new AnonymousClassThread(NUM_ITER, writer, finalI, this); threads[t].Start(); } for (int t = 0; t < NUM_THREAD; t++) while (true) try { threads[t].Join(); break; } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } writer.Close(); } IndexReader reader = IndexReader.Open(dir); int expected = (3 + (1 - i) * 2) * NUM_THREAD * NUM_ITER; Assert.AreEqual(expected, reader.DocFreq(new Term("contents", "here"))); Assert.AreEqual(expected, reader.MaxDoc()); int numDel = 0; for (int j = 0; j < reader.MaxDoc(); j++) { if (reader.IsDeleted(j)) numDel++; else reader.Document(j); reader.GetTermFreqVectors(j); } reader.Close(); Assert.AreEqual(NUM_THREAD * NUM_ITER, numDel); IndexWriter writer2 = new IndexWriter(dir, analyzer); writer2.SetMaxBufferedDocs(10); Document doc = new Document(); doc.Add(new Field("contents", "here are some contents", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int j = 0; j < 17; j++) writer2.AddDocument(doc); writer2.Optimize(); writer2.Close(); reader = IndexReader.Open(dir); expected += 17 - NUM_THREAD * NUM_ITER; Assert.AreEqual(expected, reader.DocFreq(new Term("contents", "here"))); Assert.AreEqual(expected, reader.MaxDoc()); numDel = 0; for (int j = 0; j < reader.MaxDoc(); j++) { if (reader.IsDeleted(j)) numDel++; else reader.Document(j); reader.GetTermFreqVectors(j); } reader.Close(); Assert.AreEqual(0, numDel); dir.Close(); } } [Test] public virtual void TestVariableSchema() { MockRAMDirectory dir = new MockRAMDirectory(); int delID = 0; for (int i = 0; i < 20; i++) { IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(2); writer.SetUseCompoundFile(false); Document doc = new Document(); System.String contents = "aa bb cc dd ee ff gg hh ii jj kk"; if (i == 7) { // Add empty docs here doc.Add(new Field("content3", "", Field.Store.NO, Field.Index.TOKENIZED)); } else { Field.Store storeVal; if (i % 2 == 0) { doc.Add(new Field("content4", contents, Field.Store.YES, Field.Index.TOKENIZED)); storeVal = Field.Store.YES; } else storeVal = Field.Store.NO; doc.Add(new Field("content1", contents, storeVal, Field.Index.TOKENIZED)); doc.Add(new Field("content3", "", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("content5", "", storeVal, Field.Index.TOKENIZED)); } for (int j = 0; j < 4; j++) writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); reader.DeleteDocument(delID++); reader.Close(); if (0 == i % 4) { writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); writer.SetUseCompoundFile(false); writer.Optimize(); writer.Close(); } } } //[Test] //public virtual void TestNoWaitClose() //{ // RAMDirectory directory = new MockRAMDirectory(); // Document doc = new Document(); // Field idField = new Field("id", "", Field.Store.YES, Field.Index.UN_TOKENIZED); // doc.Add(idField); // for (int pass = 0; pass < 3; pass++) // { // bool autoCommit = pass % 2 == 0; // IndexWriter writer = new IndexWriter(directory, autoCommit, new WhitespaceAnalyzer(), true); // //System.out.println("TEST: pass=" + pass + " ac=" + autoCommit + " cms=" + (pass >= 2)); // for (int iter = 0; iter < 10; iter++) // { // //System.out.println("TEST: iter=" + iter); // MergeScheduler ms; // if (pass >= 2) // ms = new ConcurrentMergeScheduler(); // else // ms = new SerialMergeScheduler(); // writer.SetMergeScheduler(ms); // writer.SetMaxBufferedDocs(2); // writer.SetMergeFactor(100); // for (int j = 0; j < 199; j++) // { // idField.SetValue(System.Convert.ToString(iter * 201 + j)); // writer.AddDocument(doc); // } // int delID = iter * 199; // for (int j = 0; j < 20; j++) // { // writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID))); // delID += 5; // } // // Force a bunch of merge threads to kick off so we // // stress out aborting them on close: // writer.SetMergeFactor(2); // IndexWriter finalWriter = writer; // System.Collections.ArrayList failure = new System.Collections.ArrayList(); // SupportClass.ThreadClass t1 = new AnonymousClassThread1(finalWriter, doc, failure, this); // if (failure.Count > 0) // { // throw (System.Exception) failure[0]; // } // t1.Start(); // writer.Close(false); // while (true) // { // try // { // t1.Join(); // break; // } // catch (System.Threading.ThreadInterruptedException) // { // SupportClass.ThreadClass.Current().Interrupt(); // } // } // // Make sure reader can read // IndexReader reader = IndexReader.Open(directory); // reader.Close(); // // Reopen // writer = new IndexWriter(directory, autoCommit, new WhitespaceAnalyzer(), false); // } // writer.Close(); // } // directory.Close(); //} // Used by test cases below private class IndexerThread : SupportClass.ThreadClass { private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } internal bool diskFull; internal System.Exception error; //internal AlreadyClosedException ace; internal IndexWriter writer; internal bool noErrors; public IndexerThread(TestIndexWriter enclosingInstance, IndexWriter writer, bool noErrors) { InitBlock(enclosingInstance); this.writer = writer; this.noErrors = noErrors; } override public void Run() { Document doc = new Document(); doc.Add(new Field("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); int idUpto = 0; int fullCount = 0; long stopTime = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 + 500; while ((System.DateTime.Now.Ticks - 621355968000000000) / 10000 < stopTime) { try { writer.UpdateDocument(new Term("id", "" + (idUpto++)), doc); } catch (System.IO.IOException ioe) { if (ioe.Message.StartsWith("fake disk full at") || ioe.Message.Equals("now failing on purpose")) { diskFull = true; try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } if (fullCount++ >= 5) break; } else { if (noErrors) { System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": ERROR: unexpected IOException:"); System.Console.Out.WriteLine(ioe.StackTrace); error = ioe; } break; } } catch (System.Exception t) { if (noErrors) { System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": ERROR: unexpected Throwable:"); System.Console.Out.WriteLine(t.StackTrace); error = t; } break; } } } } // LUCENE-1130: make sure we can close() even while // threads are trying to add documents. Strictly // speaking, this isn't valid us of Lucene's APIs, but we // still want to be robust to this case: [Test] public virtual void TestCloseWithThreads() { int NUM_THREADS = 3; for (int iter = 0; iter < 50; iter++) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(4); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; //bool diskFull = false; for (int i = 0; i < NUM_THREADS; i++) threads[i] = new IndexerThread(this, writer, false); for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 50)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } writer.Close(false); // Make sure threads that are adding docs are not hung: for (int i = 0; i < NUM_THREADS; i++) { while (true) { try { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].Join(); break; } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } if (threads[i].IsAlive) Assert.Fail("thread seems to be hung"); } // Quick test to make sure index is not corrupt: IndexReader reader = IndexReader.Open(dir); TermDocs tdocs = reader.TermDocs(new Term("field", "aaa")); int count = 0; while (tdocs.Next()) { count++; } Assert.IsTrue(count > 0); reader.Close(); dir.Close(); } } // LUCENE-1130: make sure immeidate disk full on creating // an IndexWriter (hit during DW.ThreadState.init()) is // OK: [Test] public virtual void TestImmediateDiskFull() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); dir.SetMaxSizeInBytes(dir.GetRecomputedActualSizeInBytes()); writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); try { writer.AddDocument(doc); Assert.Fail("did not hit disk full"); } catch (System.IO.IOException) { } // Without fix for LUCENE-1130: this call will hang: try { writer.AddDocument(doc); Assert.Fail("did not hit disk full"); } catch (System.IO.IOException) { } try { writer.Close(false); Assert.Fail("did not hit disk full"); } catch (System.IO.IOException) { } } // LUCENE-1130: make sure immeidate disk full on creating // an IndexWriter (hit during DW.ThreadState.init()), with // multiple threads, is OK: [Test] public virtual void TestImmediateDiskFullWithThreads() { int NUM_THREADS = 3; for (int iter = 0; iter < 10; iter++) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); // We expect disk full exceptions in the merge threads cms.SetSuppressExceptions_ForNUnitTest(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(4); dir.SetMaxSizeInBytes(4 * 1024 + 20 * iter); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; //bool diskFull = false; for (int i = 0; i < NUM_THREADS; i++) threads[i] = new IndexerThread(this, writer, true); for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); for (int i = 0; i < NUM_THREADS; i++) { while (true) { try { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].Join(); break; } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } if (threads[i].IsAlive) Assert.Fail("thread seems to be hung"); else Assert.IsTrue(threads[i].error == null, "hit unexpected Throwable"); } try { writer.Close(false); } catch (System.IO.IOException) { } dir.Close(); } } // Throws IOException during FieldsWriter.flushDocument and during DocumentsWriter.abort private class FailOnlyOnAbortOrFlush : MockRAMDirectory.Failure { private bool onlyOnce; public FailOnlyOnAbortOrFlush(bool onlyOnce) { this.onlyOnce = true; } public override void Eval(MockRAMDirectory dir) { if (doFail) { System.Diagnostics.StackFrame[] frames = new System.Diagnostics.StackTrace().GetFrames(); for (int i = 0; i < frames.Length; i++) { String methodName = frames[i].GetMethod().Name; if ("Abort".Equals(methodName) || "FlushDocument".Equals(methodName)) { if (onlyOnce) doFail = false; throw new System.IO.IOException("now failing on purpose"); } } } } } // Runs test, with one thread, using the specific failure // to trigger an IOException public virtual void _testSingleThreadFailure(MockRAMDirectory.Failure failure) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 6; i++) writer.AddDocument(doc); dir.FailOn(failure); failure.SetDoFail(); try { writer.AddDocument(doc); writer.AddDocument(doc); Assert.Fail("did not hit exception"); } catch (System.IO.IOException) { } failure.ClearDoFail(); writer.AddDocument(doc); writer.Close(false); } // Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void _testMultipleThreadsFailure(MockRAMDirectory.Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 5; iter++) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); // We expect disk full exceptions in the merge threads cms.SetSuppressExceptions_ForNUnitTest(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(4); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; //bool diskFull = false; for (int i = 0; i < NUM_THREADS; i++) threads[i] = new IndexerThread(this, writer, true); for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 10)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { while (true) { try { threads[i].Join(); break; } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } if (threads[i].IsAlive) Assert.Fail("thread seems to be hung"); else Assert.IsTrue(threads[i].error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Close(false); success = true; } catch (System.IO.IOException) { } if (success) { IndexReader reader = IndexReader.Open(dir); for (int j = 0; j < reader.MaxDoc(); j++) { if (!reader.IsDeleted(j)) { reader.Document(j); reader.GetTermFreqVectors(j); } } reader.Close(); } dir.Close(); } } // LUCENE-1130: make sure initial IOException, and then 2nd // IOException during abort(), is OK: [Test] public virtual void TestIOExceptionDuringAbort() { _testSingleThreadFailure(new FailOnlyOnAbortOrFlush(false)); } // LUCENE-1130: make sure initial IOException, and then 2nd // IOException during abort(), is OK: [Test] public virtual void TestIOExceptionDuringAbortOnlyOnce() { _testSingleThreadFailure(new FailOnlyOnAbortOrFlush(true)); } // LUCENE-1130: make sure initial IOException, and then 2nd // IOException during abort(), with multiple threads, is OK: [Test] public virtual void TestIOExceptionDuringAbortWithThreads() { _testMultipleThreadsFailure(new FailOnlyOnAbortOrFlush(false)); } // LUCENE-1130: make sure initial IOException, and then 2nd // IOException during abort(), with multiple threads, is OK: [Test] public virtual void TestIOExceptionDuringAbortWithThreadsOnlyOnce() { _testMultipleThreadsFailure(new FailOnlyOnAbortOrFlush(true)); } // Throws IOException during DocumentsWriter.closeDocStore private class FailOnlyInCloseDocStore : MockRAMDirectory.Failure { private bool onlyOnce; public FailOnlyInCloseDocStore(bool onlyOnce) { this.onlyOnce = true; } public override void Eval(MockRAMDirectory dir) { if (doFail) { System.Diagnostics.StackFrame[] frames = new System.Diagnostics.StackTrace().GetFrames(); for (int i = 0; i < frames.Length; i++) { String methodName = frames[i].GetMethod().Name; if ("CloseDocStore".Equals(methodName)) { if (onlyOnce) doFail = false; throw new System.IO.IOException("now failing on purpose"); } } } } } // LUCENE-1130: test IOException in closeDocStore [Test] public virtual void TestIOExceptionDuringCloseDocStore() { _testSingleThreadFailure(new FailOnlyInCloseDocStore(false)); } // LUCENE-1130: test IOException in closeDocStore [Test] public virtual void TestIOExceptionDuringCloseDocStoreOnlyOnce() { _testSingleThreadFailure(new FailOnlyInCloseDocStore(true)); } // LUCENE-1130: test IOException in closeDocStore, with threads [Test] public virtual void TestIOExceptionDuringCloseDocStoreWithThreads() { _testMultipleThreadsFailure(new FailOnlyInCloseDocStore(false)); } // LUCENE-1130: test IOException in closeDocStore, with threads [Test] public virtual void TestIOExceptionDuringCloseDocStoreWithThreadsOnlyOnce() { _testMultipleThreadsFailure(new FailOnlyInCloseDocStore(true)); } // Throws IOException during DocumentsWriter.writeSegment private class FailOnlyInWriteSegment : MockRAMDirectory.Failure { private bool onlyOnce; public FailOnlyInWriteSegment(bool onlyOnce) { this.onlyOnce = true; } public override void Eval(MockRAMDirectory dir) { if (doFail) { System.Diagnostics.StackFrame[] frames = new System.Diagnostics.StackTrace().GetFrames(); for (int i = 0; i < frames.Length; i++) { String methodName = frames[i].GetMethod().Name; if ("WriteSegment".Equals(methodName)) { if (onlyOnce) doFail = false; throw new System.IO.IOException("now failing on purpose"); } } } } } // LUCENE-1130: test IOException in writeSegment [Test] public virtual void TestIOExceptionDuringWriteSegment() { _testSingleThreadFailure(new FailOnlyInWriteSegment(false)); } // LUCENE-1130: test IOException in writeSegment [Test] public virtual void TestIOExceptionDuringWriteSegmentOnlyOnce() { _testSingleThreadFailure(new FailOnlyInWriteSegment(true)); } // LUCENE-1130: test IOException in writeSegment, with threads [Test] public virtual void TestIOExceptionDuringWriteSegmentWithThreads() { _testMultipleThreadsFailure(new FailOnlyInWriteSegment(false)); } // LUCENE-1130: test IOException in writeSegment, with threads [Test] public virtual void TestIOExceptionDuringWriteSegmentWithThreadsOnlyOnce() { _testMultipleThreadsFailure(new FailOnlyInWriteSegment(true)); } // LUCENE-1168 [Test] public virtual void TestTermVectorCorruption() { Directory dir = new MockRAMDirectory(); for (int iter = 0; iter < 4; iter++) { bool autoCommit = 1 == iter / 2; IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Document document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); for (int i = 0; i < reader.NumDocs(); i++) { reader.Document(i); reader.GetTermFreqVectors(i); } reader.Close(); writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Directory[] indexDirs = new Directory[]{dir}; writer.AddIndexes(indexDirs); writer.Close(); } dir.Close(); } // LUCENE-1168 [Test] public virtual void TestTermVectorCorruption2() { Directory dir = new MockRAMDirectory(); for (int iter = 0; iter < 4; iter++) { bool autoCommit = 1 == iter / 2; IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Document document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.GetTermFreqVectors(0) == null); Assert.IsTrue(reader.GetTermFreqVectors(1) == null); Assert.IsTrue(reader.GetTermFreqVectors(2) != null); reader.Close(); } dir.Close(); } // LUCENE-1168 [Test] public virtual void TestTermVectorCorruption3() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 10; i++) writer.AddDocument(document); writer.Close(); writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); for (int i = 0; i < 6; i++) writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); for (int i = 0; i < 10; i++) { reader.GetTermFreqVectors(i); reader.Document(i); } reader.Close(); dir.Close(); } // Just intercepts all merges & verifies that we are never // merging a segment with >= 20 (maxMergeDocs) docs private class MyIndexWriter : IndexWriter { private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } internal int mergeCount; internal Directory myDir; public MyIndexWriter(TestIndexWriter enclosingInstance, Directory dir):base(dir, new StandardAnalyzer()) { InitBlock(enclosingInstance); myDir = dir; } public override MergePolicy.OneMerge GetNextMerge() { lock (this) { MergePolicy.OneMerge merge = base.GetNextMerge(); if (merge != null) mergeCount++; return merge; } } } [Test] public virtual void TestOptimizeOverMerge() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(100); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 170; i++) writer.AddDocument(document); writer.Close(); MyIndexWriter myWriter = new MyIndexWriter(this, dir); myWriter.Optimize(); Assert.AreEqual(10, myWriter.mergeCount); } // LUCENE-1179 [Test] public virtual void TestEmptyFieldName() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); Document doc = new Document(); doc.Add(new Field("", "a b c", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); } // LUCENE-1198 public class MockIndexWriter : IndexWriter { public MockIndexWriter(Directory dir, bool autoCommit, Analyzer a, bool create) : base(dir, autoCommit, a, create) { } internal bool doFail; bool TestPoint(String name) { if (doFail && name.Equals("DocumentsWriter.ThreadState.init start")) throw new SystemException("intentionally failing"); return true; } } [Test] public void TestExceptionDocumentsWriterInit() { MockRAMDirectory dir = new MockRAMDirectory(); MockIndexWriter w = new MockIndexWriter(dir, false, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("field", "a field", Field.Store.YES, Field.Index.TOKENIZED)); w.AddDocument(doc); w.doFail = true; try { w.AddDocument(doc); Assert.Fail("did not hit exception"); } catch (System.Exception) { // expected } w.Close(); _TestUtil.CheckIndex(dir); dir.Close(); } // LUCENE-1208 private class AnonymousClassAnalyzer3 : Analyzer { public AnonymousClassAnalyzer3(TestIndexWriter enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { return new CrashingFilter(Enclosing_Instance, fieldName, new WhitespaceTokenizer(reader)); } } [Test] public void TestExceptionJustBeforeFlush() { MockRAMDirectory dir = new MockRAMDirectory(); MockIndexWriter w = new MockIndexWriter(dir, false, new WhitespaceAnalyzer(), true); w.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("field", "a field", Field.Store.YES, Field.Index.TOKENIZED)); w.AddDocument(doc); Analyzer analyzer = new AnonymousClassAnalyzer3(this); Document crashDoc = new Document(); crashDoc.Add(new Field("crash", "do it on token 4", Field.Store.YES, Field.Index.TOKENIZED)); try { w.AddDocument(crashDoc, analyzer); Assert.Fail("did not hit expected exception"); } catch (System.IO.IOException) { // expected } w.AddDocument(doc); w.Close(); dir.Close(); } // LUCENE-1210 public class MockIndexWriter2 : IndexWriter { public MockIndexWriter2(Directory dir, bool autoCommit, Analyzer a, bool create) : base(dir, autoCommit, a, create) { } internal bool doFail; internal bool failed; protected override bool TestPoint(String name) { if (doFail && name.Equals("startMergeInit")) { failed = true; throw new SystemException("intentionally failing"); } return true; } } [Test] public void TestExceptionOnMergeInit() { MockRAMDirectory dir = new MockRAMDirectory(); MockIndexWriter2 w = new MockIndexWriter2(dir, false, new WhitespaceAnalyzer(), true); w.SetMaxBufferedDocs(2); w.SetMergeFactor(2); w.doFail = true; w.SetMergeScheduler(new ConcurrentMergeScheduler()); Document doc = new Document(); doc.Add(new Field("field", "a field", Field.Store.YES, Field.Index.TOKENIZED)); for (int i = 0; i < 10; i++) try { w.AddDocument(doc); } catch (System.Exception) { break; } ((ConcurrentMergeScheduler)w.GetMergeScheduler()).Sync(); Assert.IsTrue(w.failed); w.Close(); dir.Close(); } // LUCENE-1222 public class MockIndexWriter3 : IndexWriter { public MockIndexWriter3(Directory dir, bool autoCommit, Analyzer a, bool create) : base(dir, autoCommit, a, create) { } internal bool wasCalled; protected override void DoAfterFlush() { wasCalled = true; } } [Test] public void TestDoAfterFlush() { MockRAMDirectory dir = new MockRAMDirectory(); MockIndexWriter3 w = new MockIndexWriter3(dir, false, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("field", "a field", Field.Store.YES, Field.Index.TOKENIZED)); w.AddDocument(doc); w.Flush(); Assert.IsTrue(w.wasCalled); w.wasCalled = true; w.DeleteDocuments(new Term("field", "field")); w.Flush(); Assert.IsTrue(w.wasCalled); w.Close(); dir.Close(); IndexReader ir = IndexReader.Open(dir); Assert.AreEqual(1, ir.MaxDoc()); Assert.AreEqual(0, ir.NumDocs()); ir.Close(); } // LUCENE-1255 [Test] public void TestNegativePositions() { SinkTokenizer tokens = new SinkTokenizer(); Token t = new Token(); t.SetTermText("a"); t.SetPositionIncrement(0); tokens.Add(t); t.SetTermText("b"); t.SetPositionIncrement(1); tokens.Add(t); t.SetTermText("c"); tokens.Add(t); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("field", tokens)); w.AddDocument(doc); w.Close(); IndexSearcher s = new IndexSearcher(dir); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "a")); pq.Add(new Term("field", "b")); pq.Add(new Term("field", "c")); Hits hits = s.Search(pq); Assert.AreEqual(1, hits.Length()); Query q = new SpanTermQuery(new Term("field", "a")); hits = s.Search(q); Assert.AreEqual(1, hits.Length()); TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a")); Assert.IsTrue(tps.Next()); Assert.AreEqual(1, tps.Freq()); Assert.AreEqual(-1, tps.NextPosition()); Assert.IsTrue(_TestUtil.CheckIndex(dir)); s.Close(); dir.Close(); } } }