/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using NUnit.Framework; using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using IndexSearcher = Lucene.Net.Search.IndexSearcher; using Hits = Lucene.Net.Search.Hits; using TermQuery = Lucene.Net.Search.TermQuery; using Directory = Lucene.Net.Store.Directory; using FSDirectory = Lucene.Net.Store.FSDirectory; using RAMDirectory = Lucene.Net.Store.RAMDirectory; using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory; using LockFactory = Lucene.Net.Store.LockFactory; using Lock = Lucene.Net.Store.Lock; using SingleInstanceLockFactory = Lucene.Net.Store.SingleInstanceLockFactory; namespace Lucene.Net.Index { /// goller /// /// $Id: TestIndexWriter.java 387550 2006-03-21 15:36:32Z yonik $ /// [TestFixture] public class TestIndexWriter { [Serializable] public class MyRAMDirectory : RAMDirectory { private void InitBlock(TestIndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestIndexWriter enclosingInstance; public TestIndexWriter Enclosing_Instance { get { return enclosingInstance; } } private LockFactory myLockFactory; internal MyRAMDirectory(TestIndexWriter enclosingInstance) { InitBlock(enclosingInstance); lockFactory = null; myLockFactory = new SingleInstanceLockFactory(); } public override Lock MakeLock(System.String name) { return myLockFactory.MakeLock(name); } } [Test] public virtual void TestDocCount() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; IndexWriter.SetDefaultWriteLockTimeout(2000); Assert.AreEqual(2000, IndexWriter.GetDefaultWriteLockTimeout()); writer = new IndexWriter(dir, new WhitespaceAnalyzer()); IndexWriter.SetDefaultWriteLockTimeout(1000); // add 100 documents for (i = 0; i < 100; i++) { AddDoc(writer); } Assert.AreEqual(100, writer.DocCount()); writer.Close(); // delete 40 documents reader = IndexReader.Open(dir); for (i = 0; i < 40; i++) { reader.DeleteDocument(i); } reader.Close(); // test doc count before segments are merged/index is optimized writer = new IndexWriter(dir, new WhitespaceAnalyzer()); Assert.AreEqual(100, writer.DocCount()); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, new WhitespaceAnalyzer()); writer.Optimize(); Assert.AreEqual(60, writer.DocCount()); writer.Close(); // check that the index reader gives the same numbers. reader = IndexReader.Open(dir); Assert.AreEqual(60, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // make sure opening a new index for create over // this existing one works correctly: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); Assert.AreEqual(0, writer.DocCount()); writer.Close(); } private void AddDoc(IndexWriter writer) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); } private void AddDocWithIndex(IndexWriter writer, int index) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("content", "aaa " + index, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("id", "" + index, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } /* Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in fact added. */ [Test] public virtual void TestAddIndexOnDiskFull() { int START_COUNT = 57; int NUM_DIR = 50; int END_COUNT = START_COUNT + NUM_DIR * 25; bool debug = false; // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = new RAMDirectory(); IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true); for (int j = 0; j < 25; j++) { AddDocWithIndex(writer, 25 * i + j); } writer.Close(); System.String[] files = dirs[i].List(); for (int j = 0; j < files.Length; j++) { inputDiskUsage += dirs[i].FileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: RAMDirectory startDir = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(startDir, new WhitespaceAnalyzer(), true); for (int j = 0; j < START_COUNT; j++) { AddDocWithIndex(writer2, j); } writer2.Close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = IndexReader.Open(startDir); Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq"); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(57, hits.Length(), "first number of hits"); searcher.Close(); reader.Close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 1000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.list(); long diskUsage = startDir.SizeInBytes(); long startDiskUsage = 0; System.String[] files2 = startDir.List(); for (int i = 0; i < files2.Length; i++) { startDiskUsage += startDir.FileLength(files2[i]); } for (int method = 0; method < 3; method++) { // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + 100; bool success = false; bool done = false; System.String methodName; if (0 == method) { methodName = "addIndexes(Directory[])"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexesNoOptimize(Directory[])"; } System.String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes"; int cycleCount = 0; while (!done) { cycleCount++; // Make a new dir that will enforce disk usage: MockRAMDirectory dir = new MockRAMDirectory(startDir); writer2 = new IndexWriter(dir, new WhitespaceAnalyzer(), false); System.IO.IOException err = null; for (int x = 0; x < 2; x++) { // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) { System.Console.Out.WriteLine("\ncycle: " + methodName + ": " + diskFree + " bytes"); } } else { thisDiskFree = 0; rate = 0.0; if (debug) { System.Console.Out.WriteLine("\ncycle: " + methodName + ", same writer: unlimited disk space"); } } dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == method) { writer2.AddIndexes(dirs); } else if (1 == method) { IndexReader[] readers = new IndexReader[dirs.Length]; for (int i = 0; i < dirs.Length; i++) { readers[i] = IndexReader.Open(dirs[i]); } try { writer2.AddIndexes(readers); } finally { for (int i = 0; i < dirs.Length; i++) { readers[i].Close(); } } } else { writer2.AddIndexesNoOptimize(dirs); } success = true; if (debug) { System.Console.Out.WriteLine(" success!"); } if (0 == x) { done = true; } } catch (System.IO.IOException e) { success = false; err = e; if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); } if (1 == x) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(methodName + " hit IOException after disk space was freed up"); } } // Whether we succeeded or failed, check that all // un-referenced files were in fact deleted (ie, // we did not create garbage). Just create a // new IndexFileDeleter, have it delete // unreferenced files, then verify that in fact // no files were deleted: System.String[] startFiles = dir.List(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); IndexFileDeleter d = new IndexFileDeleter(infos, dir); d.FindDeletableFiles(); d.DeleteFiles(); System.String[] endFiles = dir.List(); System.Array.Sort(startFiles); System.Array.Sort(endFiles); /* for(int i=0;i Make sure optimize doesn't use any more than 1X /// starting index size as its temporary free space /// required. /// [Test] public virtual void TestOptimizeTempSpaceUsage() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int j = 0; j < 500; j++) { AddDocWithIndex(writer, j); } writer.Close(); long startDiskUsage = 0; System.String[] files = dir.List(); for (int i = 0; i < files.Length; i++) { startDiskUsage += dir.FileLength(files[i]); } dir.ResetMaxUsedSizeInBytes(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); long maxDiskUsage = dir.GetMaxUsedSizeInBytes(); Assert.IsTrue(maxDiskUsage <= 2 * startDiskUsage, "optimized used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (2 * startDiskUsage) + " (= 2X starting usage)"); } public System.String ArrayToString(System.String[] l) { System.String s = ""; for (int i = 0; i < l.Length; i++) { if (i > 0) { s += "\n "; } s += l[i]; } return s; } // Make sure we can open an index for create even when a // reader holds it open (this fails pre lock-less // commits on windows): [Test] public virtual void TestCreateWithReader() { System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); System.IO.FileInfo indexDir = new System.IO.FileInfo(tempDir + "\\" + "lucenetestindexwriter"); try { Directory dir = FSDirectory.GetDirectory(indexDir); // add one document & Close writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); AddDoc(writer); writer.Close(); // now open reader: IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); // now open index for create: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); Assert.AreEqual(writer.DocCount(), 0, "should be zero documents"); AddDoc(writer); writer.Close(); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); IndexReader reader2 = IndexReader.Open(dir); Assert.AreEqual(reader2.NumDocs(), 1, "should be one document"); reader.Close(); reader2.Close(); } finally { RmDir(indexDir); } } // Same test as above, but use IndexWriter constructor // that takes File: [Test] public virtual void TestCreateWithReader2() { System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindexwriter")); try { // add one document & Close writer IndexWriter writer = new IndexWriter(indexDir, new WhitespaceAnalyzer(), true); AddDoc(writer); writer.Close(); // now open reader: IndexReader reader = IndexReader.Open(indexDir); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); // now open index for create: writer = new IndexWriter(indexDir, new WhitespaceAnalyzer(), true); Assert.AreEqual(writer.DocCount(), 0, "should be zero documents"); AddDoc(writer); writer.Close(); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); IndexReader reader2 = IndexReader.Open(indexDir); Assert.AreEqual(reader2.NumDocs(), 1, "should be one document"); reader.Close(); reader2.Close(); } finally { RmDir(indexDir); } } // Same test as above, but use IndexWriter constructor // that takes String: [Test] public virtual void TestCreateWithReader3() { System.String tempDir = SupportClass.AppSettings.Get("tempDir", ""); if (tempDir == null) throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); System.String dirName = tempDir + "/lucenetestindexwriter"; try { // add one document & Close writer IndexWriter writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true); AddDoc(writer); writer.Close(); // now open reader: IndexReader reader = IndexReader.Open(dirName); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); // now open index for create: writer = new IndexWriter(dirName, new WhitespaceAnalyzer(), true); Assert.AreEqual(writer.DocCount(), 0, "should be zero documents"); AddDoc(writer); writer.Close(); Assert.AreEqual(reader.NumDocs(), 1, "should be one document"); IndexReader reader2 = IndexReader.Open(dirName); Assert.AreEqual(reader2.NumDocs(), 1, "should be one document"); reader.Close(); reader2.Close(); } finally { RmDir(new System.IO.FileInfo(dirName)); } } // Simulate a writer that crashed while writing segments // file: make sure we can still open the index (ie, // gracefully fallback to the previous segments file), // and that we can add to the index: [Test] public virtual void TestSimulatedCrashedWriter() { Directory dir = new RAMDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // Close writer.Close(); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); Assert.IsTrue(gen > 1, "segment generation should be > 1 but got " + gen); // Make the next segments file, with last byte // missing, to simulate a writer that crashed while // writing segments file: System.String fileNameIn = SegmentInfos.GetCurrentSegmentFileName(dir); System.String fileNameOut = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1 + gen); IndexInput in_Renamed = dir.OpenInput(fileNameIn); IndexOutput out_Renamed = dir.CreateOutput(fileNameOut); long length = in_Renamed.Length(); for (int i = 0; i < length - 1; i++) { out_Renamed.WriteByte(in_Renamed.ReadByte()); } in_Renamed.Close(); out_Renamed.Close(); IndexReader reader = null; try { reader = IndexReader.Open(dir); } catch (System.Exception e) { Assert.Fail("reader failed to open on a crashed index"); } reader.Close(); try { writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); } catch (System.Exception e) { Assert.Fail("writer failed to open on a crashed index"); } // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // Close writer.Close(); } // Simulate a corrupt index by removing last byte of // latest segments file and make sure we get an // IOException trying to open the index: [Test] public virtual void TestSimulatedCorruptIndex1() { Directory dir = new RAMDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // Close writer.Close(); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); Assert.IsTrue(gen > 1, "segment generation should be > 1 but got " + gen); System.String fileNameIn = SegmentInfos.GetCurrentSegmentFileName(dir); System.String fileNameOut = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1 + gen); IndexInput in_Renamed = dir.OpenInput(fileNameIn); IndexOutput out_Renamed = dir.CreateOutput(fileNameOut); long length = in_Renamed.Length(); for (int i = 0; i < length - 1; i++) { out_Renamed.WriteByte(in_Renamed.ReadByte()); } in_Renamed.Close(); out_Renamed.Close(); dir.DeleteFile(fileNameIn); IndexReader reader = null; try { reader = IndexReader.Open(dir); Assert.Fail("reader did not hit IOException on opening a corrupt index"); } catch (System.Exception e) { } if (reader != null) { reader.Close(); } } // Simulate a corrupt index by removing one of the cfs // files and make sure we get an IOException trying to // open the index: [Test] public virtual void TestSimulatedCorruptIndex2() { Directory dir = new RAMDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (int i = 0; i < 100; i++) { AddDoc(writer); } // Close writer.Close(); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); Assert.IsTrue(gen > 1, "segment generation should be > 1 but got " + gen); System.String[] files = dir.List(); for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { dir.DeleteFile(files[i]); break; } } IndexReader reader = null; try { reader = IndexReader.Open(dir); Assert.Fail("reader did not hit IOException on opening a corrupt index"); } catch (System.Exception e) { } if (reader != null) { reader.Close(); } } // Make sure that a Directory implementation that does // not use LockFactory at all (ie overrides makeLock and // implements its own private locking) works OK. This // was raised on java-dev as loss of backwards // compatibility. [Test] public virtual void TestNullLockFactory() { Directory dir = new MyRAMDirectory(this); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < 100; i++) { AddDoc(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(100, hits.Length(), "did not get right number of hits"); writer.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.Close(); dir.Close(); } private void RmDir(System.IO.FileInfo dir) { System.IO.FileInfo[] files = SupportClass.FileSupport.GetFiles(dir); if (files != null) { for (int i = 0; i < files.Length; i++) { bool tmpBool; if (System.IO.File.Exists(files[i].FullName)) { System.IO.File.Delete(files[i].FullName); tmpBool = true; } else if (System.IO.Directory.Exists(files[i].FullName)) { System.IO.Directory.Delete(files[i].FullName); tmpBool = true; } else tmpBool = false; bool generatedAux = tmpBool; } } bool tmpBool2; if (System.IO.File.Exists(dir.FullName)) { System.IO.File.Delete(dir.FullName); tmpBool2 = true; } else if (System.IO.Directory.Exists(dir.FullName)) { System.IO.Directory.Delete(dir.FullName); tmpBool2 = true; } else tmpBool2 = false; bool generatedAux2 = tmpBool2; } } }