/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Directory = Lucene.Net.Store.Directory;
namespace Lucene.Net.Index
{
/*
* This class keeps track of each SegmentInfos instance that
* is still "live", either because it corresponds to a
* segments_N file in the Directory (a "commit", i.e. a
* committed SegmentInfos) or because it's the in-memory SegmentInfos
* that a writer is actively updating but has not yet committed
* (currently this only applies when autoCommit=false in IndexWriter).
* This class uses simple reference counting to map the live
* SegmentInfos instances to individual files in the Directory.
*
* The same directory file may be referenced by more than
* one IndexCommitPoints, i.e. more than one SegmentInfos.
* Therefore we count how many commits reference each file.
* When all the commits referencing a certain file have been
* deleted, the refcount for that file becomes zero, and the
* file is deleted.
*
* A separate deletion policy interface
* (IndexDeletionPolicy) is consulted on creation (onInit)
* and once per commit (onCommit), to decide when a commit
* should be removed.
*
* It is the business of the IndexDeletionPolicy to choose
* when to delete commit points. The actual mechanics of
* file deletion, retrying, etc, derived from the deletion
* of commit points is the business of the IndexFileDeleter.
*
* The current default deletion policy is {@link
* KeepOnlyLastCommitDeletionPolicy}, which removes all
* prior commits when a new commit has completed. This
* matches the behavior before 2.2.
*
* Note that you must hold the write.lock before
* instantiating this class. It opens segments_N file(s)
* directly with no retry logic.
*/
sealed public class IndexFileDeleter
{
/* Files that we tried to delete but failed (likely
* because they are open and we are running on Windows),
* so we will retry them again later: */
private System.Collections.IList deletable;
/* Reference count for all files in the index.
* Counts how many existing commits reference a file.
* Maps String to RefCount (class below) instances: */
private System.Collections.IDictionary refCounts = new System.Collections.Hashtable();
/* Holds all commits (segments_N) currently in the index.
* This will have just 1 commit if you are using the
* default delete policy (KeepOnlyLastCommitDeletionPolicy).
* Other policies may leave commit points live for longer
* in which case this list would be longer than 1: */
private System.Collections.ArrayList commits = new System.Collections.ArrayList();
/* Holds files we had incref'd from the previous
* non-commit checkpoint: */
private System.Collections.IList lastFiles = new System.Collections.ArrayList();
/* Commits that the IndexDeletionPolicy have decided to delete: */
private System.Collections.IList commitsToDelete = new System.Collections.ArrayList();
private System.IO.TextWriter infoStream;
private Directory directory;
private IndexDeletionPolicy policy;
private DocumentsWriter docWriter;
/// Change to true to see details of reference counts when
/// infoStream != null
///
public static bool VERBOSE_REF_COUNTS = false;
internal void SetInfoStream(System.IO.TextWriter infoStream)
{
this.infoStream = infoStream;
if (infoStream != null)
{
Message("setInfoStream deletionPolicy=" + policy);
}
}
private void Message(System.String message)
{
infoStream.WriteLine("IFD [" + SupportClass.ThreadClass.Current().Name + "]: " + message);
}
/// Initialize the deleter: find all previous commits in
/// the Directory, incref the files they reference, call
/// the policy to let it delete commits. The incoming
/// segmentInfos must have been loaded from a commit point
/// and not yet modified. This will remove any files not
/// referenced by any of the commits.
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.TextWriter infoStream, DocumentsWriter docWriter)
{
this.docWriter = docWriter;
this.infoStream = infoStream;
if (infoStream != null)
{
Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy);
}
this.policy = policy;
this.directory = directory;
// First pass: walk the files and initialize our ref
// counts:
long currentGen = segmentInfos.GetGeneration();
IndexFileNameFilter filter = IndexFileNameFilter.GetFilter();
System.String[] files = directory.List();
if (files == null)
{
throw new System.IO.IOException("cannot read directory " + directory + ": list() returned null");
}
CommitPoint currentCommitPoint = null;
for (int i = 0; i < files.Length; i++)
{
System.String fileName = files[i];
if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
{
// Add this file to refCounts with initial count 0:
GetRefCount(fileName);
if (fileName.StartsWith(IndexFileNames.SEGMENTS))
{
// This is a commit (segments or segments_N), and
// it's valid (<= the max gen). Load it, then
// incref all files it refers to:
if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen)
{
if (infoStream != null)
{
Message("init: load commit \"" + fileName + "\"");
}
SegmentInfos sis = new SegmentInfos();
try
{
sis.Read(directory, fileName);
}
catch (System.IO.FileNotFoundException e)
{
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
// dir listing will be stale and will claim a
// file segments_X exists when in fact it
// doesn't. So, we catch this and handle it
// as if the file does not exist
if (infoStream != null)
{
Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
}
sis = null;
}
if (sis != null)
{
CommitPoint commitPoint = new CommitPoint(this, sis);
if (sis.GetGeneration() == segmentInfos.GetGeneration())
{
currentCommitPoint = commitPoint;
}
commits.Add(commitPoint);
IncRef(sis, true);
}
}
}
}
}
if (currentCommitPoint == null)
{
// We did not in fact see the segments_N file
// corresponding to the segmentInfos that was passed
// in. Yet, it must exist, because our caller holds
// the write lock. This can happen when the directory
// listing was stale (eg when index accessed via NFS
// client with stale directory listing cache). So we
// try now to explicitly open this commit point:
SegmentInfos sis = new SegmentInfos();
try
{
sis.Read(directory, segmentInfos.GetCurrentSegmentFileName());
}
catch (System.IO.IOException e)
{
throw new CorruptIndexException("failed to locate current segments_N file");
}
if (infoStream != null)
Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName());
currentCommitPoint = new CommitPoint(this, sis);
commits.Add(currentCommitPoint);
IncRef(sis, true);
}
// We keep commits list in sorted order (oldest to newest):
commits.Sort();
// Now delete anything with ref count at 0. These are
// presumably abandoned files eg due to crash of
// IndexWriter.
System.Collections.IEnumerator it = refCounts.Keys.GetEnumerator();
while (it.MoveNext())
{
System.String fileName = (System.String) it.Current;
RefCount rc = (RefCount) refCounts[fileName];
if (0 == rc.count)
{
if (infoStream != null)
{
Message("init: removing unreferenced file \"" + fileName + "\"");
}
DeleteFile(fileName);
}
}
// Finally, give policy a chance to remove things on
// startup:
policy.OnInit(commits);
// It's OK for the onInit to remove the current commit
// point; we just have to checkpoint our in-memory
// SegmentInfos to protect those files that it uses:
if (currentCommitPoint.deleted)
{
Checkpoint(segmentInfos, false);
}
DeleteCommits();
}
/// Remove the CommitPoints in the commitsToDelete List by
/// DecRef'ing all files from each SegmentInfos.
///
private void DeleteCommits()
{
int size = commitsToDelete.Count;
if (size > 0)
{
// First decref all files that had been referred to by
// the now-deleted commits:
for (int i = 0; i < size; i++)
{
CommitPoint commit = (CommitPoint) commitsToDelete[i];
if (infoStream != null)
{
Message("deleteCommits: now remove commit \"" + commit.GetSegmentsFileName() + "\"");
}
int size2 = commit.files.Count;
for (int j = 0; j < size2; j++)
{
DecRef((System.String) commit.files[j]);
}
}
commitsToDelete.Clear();
// Now compact commits to remove deleted ones (preserving the sort):
size = commits.Count;
int readFrom = 0;
int writeTo = 0;
while (readFrom < size)
{
CommitPoint commit = (CommitPoint) commits[readFrom];
if (!commit.deleted)
{
if (writeTo != readFrom)
{
commits[writeTo] = commits[readFrom];
}
writeTo++;
}
readFrom++;
}
while (size > writeTo)
{
commits.RemoveAt(size - 1);
size--;
}
}
}
/// Writer calls this when it has hit an error and had to
/// roll back, to tell us that there may now be
/// unreferenced files in the filesystem. So we re-list
/// the filesystem and delete such files. If segmentName
/// is non-null, we will only delete files corresponding to
/// that segment.
///
public void Refresh(System.String segmentName)
{
System.String[] files = directory.List();
if (files == null)
{
throw new System.IO.IOException("cannot read directory " + directory + ": list() returned null");
}
IndexFileNameFilter filter = IndexFileNameFilter.GetFilter();
System.String segmentPrefix1;
System.String segmentPrefix2;
if (segmentName != null)
{
segmentPrefix1 = segmentName + ".";
segmentPrefix2 = segmentName + "_";
}
else
{
segmentPrefix1 = null;
segmentPrefix2 = null;
}
for (int i = 0; i < files.Length; i++)
{
System.String fileName = files[i];
if (filter.Accept(null, fileName) && (segmentName == null || fileName.StartsWith(segmentPrefix1) || fileName.StartsWith(segmentPrefix2)) && !refCounts.Contains(fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
{
// Unreferenced file, so remove it
if (infoStream != null)
{
Message("refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\"");
}
DeleteFile(fileName);
}
}
}
public void Refresh()
{
Refresh(null);
}
public void Close()
{
DeletePendingFiles();
}
private void DeletePendingFiles()
{
if (deletable != null)
{
System.Collections.IList oldDeletable = deletable;
deletable = null;
int size = oldDeletable.Count;
for (int i = 0; i < size; i++)
{
if (infoStream != null)
{
Message("delete pending file " + oldDeletable[i]);
}
DeleteFile((System.String) oldDeletable[i]);
}
}
}
/// For definition of "check point" see IndexWriter comments:
/// "Clarification: Check Points (and commits)".
///
/// Writer calls this when it has made a "consistent
/// change" to the index, meaning new files are written to
/// the index and the in-memory SegmentInfos have been
/// modified to point to those files.
///
/// This may or may not be a commit (segments_N may or may
/// not have been written).
///
/// We simply incref the files referenced by the new
/// SegmentInfos and decref the files we had previously
/// seen (if any).
///
/// If this is a commit, we also call the policy to give it
/// a chance to remove other commits. If any commits are
/// removed, we decref their files as well.
///
public void Checkpoint(SegmentInfos segmentInfos, bool isCommit)
{
if (infoStream != null)
{
Message("now checkpoint \"" + segmentInfos.GetCurrentSegmentFileName() + "\" [" + segmentInfos.Count + " segments " + "; isCommit = " + isCommit + "]");
}
// Try again now to delete any previously un-deletable
// files (because they were in use, on Windows):
DeletePendingFiles();
// Incref the files:
IncRef(segmentInfos, isCommit);
System.Collections.IList docWriterFiles;
if (docWriter != null)
{
docWriterFiles = docWriter.Files();
if (docWriterFiles != null)
IncRef(docWriterFiles);
}
else
docWriterFiles = null;
if (isCommit)
{
// Append to our commits list:
commits.Add(new CommitPoint(this, segmentInfos));
// Tell policy so it can remove commits:
policy.OnCommit(commits);
// Decref files for commits that were deleted by the policy:
DeleteCommits();
}
// DecRef old files from the last checkpoint, if any:
int size = lastFiles.Count;
if (size > 0)
{
for (int i = 0; i < size; i++)
DecRef((System.Collections.IList) lastFiles[i]);
lastFiles.Clear();
}
if (!isCommit)
{
// Save files so we can decr on next checkpoint/commit:
size = segmentInfos.Count;
for (int i = 0; i < size; i++)
{
SegmentInfo segmentInfo = segmentInfos.Info(i);
if (segmentInfo.dir == directory)
{
lastFiles.Add(segmentInfo.Files());
}
}
}
if (docWriterFiles != null)
lastFiles.Add(docWriterFiles);
}
internal void IncRef(SegmentInfos segmentInfos, bool isCommit)
{
int size = segmentInfos.Count;
for (int i = 0; i < size; i++)
{
SegmentInfo segmentInfo = segmentInfos.Info(i);
if (segmentInfo.dir == directory)
{
IncRef(segmentInfo.Files());
}
}
if (isCommit)
{
// Since this is a commit point, also incref its
// segments_N file:
GetRefCount(segmentInfos.GetCurrentSegmentFileName()).IncRef();
}
}
internal void IncRef(System.Collections.IList files)
{
int size = files.Count;
for (int i = 0; i < size; i++)
{
System.String fileName = (System.String) files[i];
RefCount rc = GetRefCount(fileName);
if (infoStream != null && VERBOSE_REF_COUNTS)
{
Message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
}
rc.IncRef();
}
}
internal void DecRef(System.Collections.IList files)
{
int size = files.Count;
for (int i = 0; i < size; i++)
{
DecRef((System.String) files[i]);
}
}
private void DecRef(System.String fileName)
{
RefCount rc = GetRefCount(fileName);
if (infoStream != null && VERBOSE_REF_COUNTS)
{
Message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
}
if (0 == rc.DecRef())
{
// This file is no longer referenced by any past
// commit points nor by the in-memory SegmentInfos:
DeleteFile(fileName);
refCounts.Remove(fileName);
}
}
internal void DecRef(SegmentInfos segmentInfos)
{
int size = segmentInfos.Count;
for (int i = 0; i < size; i++)
{
SegmentInfo segmentInfo = segmentInfos.Info(i);
if (segmentInfo.dir == directory)
{
DecRef(segmentInfo.Files());
}
}
}
private RefCount GetRefCount(System.String fileName)
{
RefCount rc;
if (!refCounts.Contains(fileName))
{
rc = new RefCount();
refCounts[fileName] = rc;
}
else
{
rc = (RefCount) refCounts[fileName];
}
return rc;
}
internal void DeleteFiles(System.Collections.IList files)
{
int size = files.Count;
for (int i = 0; i < size; i++)
DeleteFile((System.String) files[i]);
}
/// Delets the specified files, but only if they are new
/// (have not yet been incref'd).
///
internal void DeleteNewFiles(System.Collections.IList files)
{
int size = files.Count;
for (int i = 0; i < size; i++)
if (!refCounts.Contains(files[i]))
DeleteFile((System.String) files[i]);
}
internal void DeleteFile(System.String fileName)
{
try
{
if (infoStream != null)
{
Message("delete \"" + fileName + "\"");
}
directory.DeleteFile(fileName);
}
catch (System.IO.IOException e)
{
// if delete fails
if (directory.FileExists(fileName))
{
// Some operating systems (e.g. Windows) don't
// permit a file to be deleted while it is opened
// for read (e.g. by another process or thread). So
// we assume that when a delete fails it is because
// the file is open in another process, and queue
// the file for subsequent deletion.
if (infoStream != null)
{
Message("IndexFileDeleter: unable to remove file \"" + fileName + "\": " + e.ToString() + "; Will re-try later.");
}
if (deletable == null)
{
deletable = new System.Collections.ArrayList();
}
deletable.Add(fileName); // add to deletable
}
}
}
/// Tracks the reference count for a single index file:
sealed private class RefCount
{
internal int count;
public int IncRef()
{
return ++count;
}
public int DecRef()
{
System.Diagnostics.Debug.Assert(count > 0);
return --count;
}
}
/// Holds details for each commit point. This class is
/// also passed to the deletion policy. Note: this class
/// has a natural ordering that is inconsistent with
/// equals.
///
sealed private class CommitPoint : System.IComparable, IndexCommitPoint
{
private void InitBlock(IndexFileDeleter enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private IndexFileDeleter enclosingInstance;
public IndexFileDeleter Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
internal long gen;
internal System.Collections.IList files;
internal System.String segmentsFileName;
internal bool deleted;
public CommitPoint(IndexFileDeleter enclosingInstance, SegmentInfos segmentInfos)
{
InitBlock(enclosingInstance);
segmentsFileName = segmentInfos.GetCurrentSegmentFileName();
int size = segmentInfos.Count;
files = new System.Collections.ArrayList(size);
files.Add(segmentsFileName);
gen = segmentInfos.GetGeneration();
for (int i = 0; i < size; i++)
{
SegmentInfo segmentInfo = segmentInfos.Info(i);
if (segmentInfo.dir == Enclosing_Instance.directory)
{
System.Collections.IEnumerator filesEnum = segmentInfo.Files().GetEnumerator();
while (filesEnum.MoveNext())
{
files.Add(filesEnum.Current);
}
}
}
}
/// Get the segments_N file for this commit point.
public System.String GetSegmentsFileName()
{
return segmentsFileName;
}
public System.Collections.ICollection GetFileNames()
{
return System.Collections.ArrayList.ReadOnly(new System.Collections.ArrayList(files));
}
/// Called only be the deletion policy, to remove this
/// commit point from the index.
///
public void Delete()
{
if (!deleted)
{
deleted = true;
Enclosing_Instance.commitsToDelete.Add(this);
}
}
public int CompareTo(System.Object obj)
{
CommitPoint commit = (CommitPoint) obj;
if (gen < commit.gen)
{
return - 1;
}
else if (gen > commit.gen)
{
return 1;
}
else
{
return 0;
}
}
}
}
}