/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using Directory = Lucene.Net.Store.Directory; using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; namespace Lucene.Net.Index { /// Combines multiple files into a single compound file. /// The file format:
/// /// VInt fileCount /// {Directory} /// fileCount entries with the following structure: /// /// long dataOffset /// String fileName /// /// {File Data} /// fileCount entries with the raw data of the corresponding file /// /// /// The fileCount integer indicates how many files are contained in this compound /// file. The {directory} that follows has that many entries. Each directory entry /// contains a long pointer to the start of this file's data section, and a String /// with that file's name. ///
public sealed class CompoundFileWriter : IDisposable { private sealed class FileEntry { /// source file internal System.String file; /// temporary holder for the start of directory entry for this file internal long directoryOffset; /// temporary holder for the start of this file's data section internal long dataOffset; } private readonly Directory directory; private readonly String fileName; private readonly HashSet ids; private readonly LinkedList entries; private bool merged = false; private readonly SegmentMerger.CheckAbort checkAbort; /// Create the compound stream in the specified file. The file name is the /// entire name (no extensions are added). /// /// NullPointerException if dir or name is null public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null) { } internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort) { if (dir == null) throw new ArgumentNullException("dir"); if (name == null) throw new ArgumentNullException("name"); this.checkAbort = checkAbort; directory = dir; fileName = name; ids = new HashSet(); entries = new LinkedList(); } /// Returns the directory of the compound file. public Directory Directory { get { return directory; } } /// Returns the name of the compound file. public string Name { get { return fileName; } } /// Add a source stream. file is the string by which the /// sub-stream will be known in the compound stream. /// /// /// IllegalStateException if this writer is closed /// NullPointerException if file is null /// IllegalArgumentException if a file with the same name /// has been added already /// public void AddFile(String file) { if (merged) throw new InvalidOperationException("Can't add extensions after merge has been called"); if (file == null) throw new ArgumentNullException("file"); try { ids.Add(file); } catch (Exception) { throw new ArgumentException("File " + file + " already added"); } var entry = new FileEntry {file = file}; entries.AddLast(entry); } [Obsolete("Use Dispose() instead")] public void Close() { Dispose(); } /// Merge files with the extensions added up to now. /// All files with these extensions are combined sequentially into the /// compound stream. After successful merge, the source files /// are deleted. /// /// IllegalStateException if close() had been called before or /// if no file has been added to this object /// public void Dispose() { // Extract into protected method if class ever becomes unsealed // TODO: Dispose shouldn't throw exceptions! if (merged) throw new SystemException("Merge already performed"); if ((entries.Count == 0)) throw new SystemException("No entries to merge have been defined"); merged = true; // open the compound stream IndexOutput os = null; try { os = directory.CreateOutput(fileName); // Write the number of entries os.WriteVInt(entries.Count); // Write the directory with all offsets at 0. // Remember the positions of directory entries so that we can // adjust the offsets later long totalSize = 0; foreach (FileEntry fe in entries) { fe.directoryOffset = os.FilePointer; os.WriteLong(0); // for now os.WriteString(fe.file); totalSize += directory.FileLength(fe.file); } // Pre-allocate size of file as optimization -- // this can potentially help IO performance as // we write the file and also later during // searching. It also uncovers a disk-full // situation earlier and hopefully without // actually filling disk to 100%: long finalLength = totalSize + os.FilePointer; os.SetLength(finalLength); // Open the files and copy their data into the stream. // Remember the locations of each file's data section. var buffer = new byte[16384]; foreach (FileEntry fe in entries) { fe.dataOffset = os.FilePointer; CopyFile(fe, os, buffer); } // Write the data offsets into the directory of the compound stream foreach (FileEntry fe in entries) { os.Seek(fe.directoryOffset); os.WriteLong(fe.dataOffset); } System.Diagnostics.Debug.Assert(finalLength == os.Length); // Close the output stream. Set the os to null before trying to // close so that if an exception occurs during the close, the // finally clause below will not attempt to close the stream // the second time. IndexOutput tmp = os; os = null; tmp.Close(); } finally { if (os != null) try { os.Close(); } catch (System.IO.IOException) { } } } /// Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput isRenamed = null; try { long startPtr = os.FilePointer; isRenamed = directory.OpenInput(source.file); long length = isRenamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { var len = (int) Math.Min(chunk, remainder); isRenamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } // Verify that remainder is 0 if (remainder != 0) throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); // Verify that the output length diff is equal to original file long endPtr = os.FilePointer; long diff = endPtr - startPtr; if (diff != length) throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } finally { if (isRenamed != null) isRenamed.Close(); } } } }