/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Document = Lucene.Net.Documents.Document;
using Directory = Lucene.Net.Store.Directory;
using FSDirectory = Lucene.Net.Store.FSDirectory;
using IndexInput = Lucene.Net.Store.IndexInput;
namespace Lucene.Net.Index
{
///
/// Basic tool and API to check the health of an index and
/// write a new segments file that removes reference to
/// problematic segments.
///
/// As this tool checks every byte in the index, on a large
/// index it can take quite a long time to run.
///
///
WARNING : this tool and API is new and
/// experimental and is subject to suddenly change in the
/// next release. Please make a complete backup of your
/// index before using this to fix your index!
///
public class CheckIndex
{
///
/// Default print stream for all CheckIndex instances.
///
[Obsolete("use SetInfoStream per instance instead")]
public static System.IO.TextWriter out_Renamed = null;
private System.IO.TextWriter infoStream;
private Directory dir;
/**
* Returned from {@link #CheckIndex()} detailing the health and status of the index.
*
*
WARNING : this API is new and experimental and is
* subject to suddenly change in the next release.
**/
public class Status
{
/** True if no problems were found with the index. */
public bool clean;
/** True if we were unable to locate and load the segments_N file. */
public bool missingSegments;
/** True if we were unable to open the segments_N file. */
public bool cantOpenSegments;
/** True if we were unable to read the version number from segments_N file. */
public bool missingSegmentVersion;
/** Name of latest segments_N file in the index. */
public string segmentsFileName;
/** Number of segments in the index. */
public int numSegments;
/** string description of the version of the index. */
public string segmentFormat;
/** Empty unless you passed specific segments list to check as optional 3rd argument.
* @see CheckIndex#CheckIndex(List) */
//public IList segmentsChecked = new List();
public IList segmentsChecked = new List();
/** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
public bool toolOutOfDate;
/** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
public IList segmentInfos = new List();
/** Directory index is in. */
public Directory dir;
/** SegmentInfos instance containing only segments that
* had no problems (this is used with the {@link
* CheckIndex#fix} method to repair the index. */
internal SegmentInfos newSegments;
/** How many documents will be lost to bad segments. */
public int totLoseDocCount;
/** How many bad segments were found. */
public int numBadSegments;
/** True if we checked only specific segments ({@link
* #CheckIndex(List)}) was called with non-null
* argument). */
public bool partial;
/** Holds the status of each segment in the index.
* See {@link #segmentInfos}.
*
* WARNING : this API is new and experimental and is
* subject to suddenly change in the next release.
*/
public class SegmentInfoStatus
{
/** Name of the segment. */
public string name;
/** Document count (does not take deletions into account). */
public int docCount;
/** True if segment is compound file format. */
public bool compound;
/** Number of files referenced by this segment. */
public int numFiles;
/** Net size (MB) of the files referenced by this
* segment. */
public double sizeMB;
/** Doc store offset, if this segment shares the doc
* store files (stored fields and term vectors) with
* other segments. This is -1 if it does not share. */
public int docStoreOffset = -1;
/** string of the shared doc store segment, or null if
* this segment does not share the doc store files. */
public string docStoreSegment;
/** True if the shared doc store files are compound file
* format. */
public bool docStoreCompoundFile;
/** True if this segment has pending deletions. */
public bool hasDeletions;
/** Name of the current deletions file name. */
public string deletionsFileName;
/** Number of deleted documents. */
public int numDeleted;
/** True if we were able to open a SegmentReader on this
* segment. */
public bool openReaderPassed;
/** Number of fields in this segment. */
public int numFields;
/** True if at least one of the fields in this segment
* does not omitTf.
* @see Fieldable#setOmitTf */
public bool hasProx;
}
}
/** Create a new CheckIndex on the directory. */
public CheckIndex(Directory dir)
{
this.dir = dir;
infoStream = out_Renamed;
}
/** Set infoStream where messages should go. If null, no
* messages are printed */
public void SetInfoStream(System.IO.TextWriter out_Renamed)
{
infoStream = out_Renamed;
}
private void Msg(string msg)
{
if (infoStream != null)
infoStream.WriteLine(msg);
}
private class MySegmentTermDocs : SegmentTermDocs
{
internal int delCount;
internal MySegmentTermDocs(SegmentReader p)
: base(p)
{
}
public override void Seek(Term term)
{
base.Seek(term);
delCount = 0;
}
protected internal override void SkippingDoc()
{
delCount++;
}
}
/** Returns true if index is clean, else false.
* @deprecated Please instantiate a CheckIndex and then use {@link #CheckIndex()} instead */
public static bool Check(Directory dir, bool doFix)
{
return Check(dir, doFix, null);
}
/** Returns true if index is clean, else false.
* @deprecated Please instantiate a CheckIndex and then use {@link #CheckIndex(List)} instead */
public static bool Check(Directory dir, bool doFix, IList onlySegments)
{
CheckIndex checker = new CheckIndex(dir);
Status status = checker.CheckIndex_Renamed(onlySegments);
if (doFix && !status.clean)
checker.FixIndex(status);
return status.clean;
}
/** Returns a {@link Status} instance detailing
* the state of the index.
*
* As this method checks every byte in the index, on a large
* index it can take quite a long time to run.
*
*
WARNING : make sure
* you only call this when the index is not opened by any
* writer. */
public Status CheckIndex_Renamed()
{
return CheckIndex_Renamed(null);
}
/** Returns a {@link Status} instance detailing
* the state of the index.
*
* @param onlySegments list of specific segment names to check
*
*
As this method checks every byte in the specified
* segments, on a large index it can take quite a long
* time to run.
*
*
WARNING : make sure
* you only call this when the index is not opened by any
* writer. */
public Status CheckIndex_Renamed(IList onlySegments)
{
System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
SegmentInfos sis = new SegmentInfos();
Status result = new Status();
result.dir = dir;
try
{
sis.Read(dir);
}
catch (System.Exception t)
{
Msg("ERROR: could not read any segments file in directory");
result.missingSegments = true;
if (infoStream != null)
infoStream.WriteLine(t.StackTrace);
return result;
}
int numSegments = sis.Count;
string segmentsFileName = sis.GetCurrentSegmentFileName();
IndexInput input = null;
try
{
input = dir.OpenInput(segmentsFileName);
}
catch (System.Exception t)
{
Msg("ERROR: could not open segments file in directory");
if (infoStream != null)
infoStream.WriteLine(t.StackTrace);
result.cantOpenSegments = true;
return result;
}
int format = 0;
try
{
format = input.ReadInt();
}
catch (System.Exception t)
{
Msg("ERROR: could not read segment file version in directory");
if (infoStream != null)
infoStream.WriteLine(t.StackTrace);
result.missingSegmentVersion = true;
return result;
}
finally
{
if (input != null)
input.Close();
}
string sFormat = "";
bool skip = false;
if (format == SegmentInfos.FORMAT)
sFormat = "FORMAT [Lucene Pre-2.1]";
if (format == SegmentInfos.FORMAT_LOCKLESS)
sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
else
{
if (format == SegmentInfos.FORMAT_CHECKSUM)
sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
else if (format == SegmentInfos.FORMAT_DEL_COUNT)
sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
else if (format == SegmentInfos.FORMAT_HAS_PROX)
sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
else if (format < SegmentInfos.CURRENT_FORMAT)
{
sFormat = "int=" + format + " [newer version of Lucene than this tool]";
skip = true;
}
else
{
sFormat = format + " [Lucene 1.3 or prior]";
}
}
Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);
result.segmentsFileName = segmentsFileName;
result.numSegments = numSegments;
result.segmentFormat = sFormat;
if (onlySegments != null)
{
result.partial = true;
if (infoStream != null)
infoStream.Write("\nChecking only these segments:");
IEnumerator it = onlySegments.GetEnumerator();
while (it.MoveNext())
{
if (infoStream != null)
infoStream.Write(" " + it.Current);
}
SupportClass.CollectionsSupport.AddAll(onlySegments, (System.Collections.Generic.IList)(result.segmentsChecked));
Msg(":");
}
if (skip)
{
Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
result.toolOutOfDate = true;
return result;
}
result.newSegments = (SegmentInfos)sis.Clone();
result.newSegments.Clear();
for (int i = 0; i < numSegments; i++)
{
SegmentInfo info = sis.Info(i);
if (onlySegments != null && !onlySegments.Contains(info.name))
continue;
Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
result.segmentInfos.Add(segInfoStat);
Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
segInfoStat.name = info.name;
segInfoStat.docCount = info.docCount;
int toLoseDocCount = info.docCount;
SegmentReader reader = null;
try
{
Msg(" compound=" + info.GetUseCompoundFile());
segInfoStat.compound = info.GetUseCompoundFile();
Msg(" hasProx=" + info.GetHasProx());
segInfoStat.hasProx = info.GetHasProx();
Msg(" numFiles=" + info.Files().Count);
segInfoStat.numFiles = info.Files().Count;
//msg(" size (MB)=" + nf.Format(info.SizeInBytes()/(1024.*1024.)));
Msg(string.Format(nf, " size (MB)={0:f}", new object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
int docStoreOffset = info.GetDocStoreOffset();
if (docStoreOffset != -1)
{
Msg(" docStoreOffset=" + docStoreOffset);
segInfoStat.docStoreOffset = docStoreOffset;
Msg(" docStoreSegment=" + info.GetDocStoreSegment());
segInfoStat.docStoreSegment = info.GetDocStoreSegment();
Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
}
string delFileName = info.GetDelFileName();
if (delFileName == null)
{
Msg(" no deletions");
segInfoStat.hasDeletions = false;
}
else
{
Msg(" has deletions [delFileName=" + delFileName + "]");
segInfoStat.hasDeletions = true;
segInfoStat.deletionsFileName = delFileName;
}
if (infoStream != null)
infoStream.Write(" test: open reader.........");
reader = SegmentReader.Get(info);
int numDocs = reader.NumDocs();
toLoseDocCount = numDocs;
if (reader.HasDeletions())
{
if (info.docCount - numDocs != info.GetDelCount())
{
throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
}
segInfoStat.numDeleted = info.docCount - numDocs;
Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
}
else
{
if (info.GetDelCount() != 0)
{
throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
}
Msg("OK");
}
if (infoStream != null)
infoStream.Write(" test: fields, norms.......");
ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
IEnumerator it = fieldNames.GetEnumerator();
while (it.MoveNext())
{
string fieldName = it.Current;
byte[] b = reader.Norms(fieldName);
if (b.Length != info.docCount)
throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
}
Msg("OK [" + fieldNames.Count + " fields]");
segInfoStat.numFields = fieldNames.Count;
if (infoStream != null)
infoStream.Write(" test: terms, freq, prox...");
TermEnum termEnum = reader.Terms();
TermPositions termPositions = reader.TermPositions();
// Used only to count up # deleted docs for this
// term
MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
long termCount = 0;
long totFreq = 0;
long totPos = 0;
while (termEnum.Next())
{
termCount++;
Term term = termEnum.Term();
int docFreq = termEnum.DocFreq();
termPositions.Seek(term);
int lastDoc = -1;
int freq0 = 0;
totFreq += docFreq;
while (termPositions.Next())
{
freq0++;
int doc = termPositions.Doc();
int freq = termPositions.Freq();
if (doc <= lastDoc)
throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
lastDoc = doc;
if (freq <= 0)
throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
int lastPos = -1;
totPos += freq;
for (int j = 0; j < freq; j++)
{
int pos = termPositions.NextPosition();
if (pos < -1)
throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
if (pos < lastPos)
throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
}
}
// Now count how many deleted docs occurred in
// this term:
int delCount;
if (reader.HasDeletions())
{
myTermDocs.Seek(term);
while (myTermDocs.Next())
{
}
delCount = myTermDocs.delCount;
}
else
delCount = 0;
if (freq0 + delCount != docFreq)
throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
}
Msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
if (infoStream != null)
infoStream.Write(" test: stored fields.......");
int docCount = 0;
long totFields = 0;
for (int j = 0; j < info.docCount; j++)
if (!reader.IsDeleted(j))
{
docCount++;
Document doc = reader.Document(j);
totFields += doc.GetFields().Count;
}
if (docCount != reader.NumDocs())
throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
//msg("OK [" + totFields + " total field count; avg " + nf.Format((((float) totFields)/docCount)) + " fields per doc]");
Msg(string.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { totFields, (((float)totFields) / docCount) }));
if (infoStream != null)
infoStream.Write(" test: term vectors........");
int totVectors = 0;
for (int j = 0; j < info.docCount; j++)
if (!reader.IsDeleted(j))
{
TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
if (tfv != null)
totVectors += tfv.Length;
}
//msg("OK [" + totVectors + " total vector count; avg " + nf.Format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");
Msg(string.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { totVectors, (((float)totVectors) / docCount) }));
Msg("");
}
catch (System.Exception t)
{
Msg("FAILED");
string comment;
comment = "FixIndex() would remove reference to this segment";
Msg(" WARNING: " + comment + "; full exception:");
if (infoStream != null)
infoStream.WriteLine(t.StackTrace);
Msg("");
result.totLoseDocCount += toLoseDocCount;
result.numBadSegments++;
continue;
}
finally
{
if (reader != null)
reader.Close();
}
// Keeper
result.newSegments.Add(info.Clone());
}
if (0 == result.numBadSegments)
{
result.clean = true;
Msg("No problems were detected with this index.\n");
}
else
Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
return result;
}
/** Repairs the index using previously returned result
* from {@link #checkIndex}. Note that this does not
* remove any of the unreferenced files after it's done;
* you must separately open an {@link IndexWriter}, which
* deletes unreferenced files when it's created.
*
* WARNING : this writes a
* new segments file into the index, effectively removing
* all documents in broken segments from the index.
* BE CAREFUL.
*
*
WARNING : Make sure you only call this when the
* index is not opened by any writer. */
public void FixIndex(Status result)
{
if (result.partial)
throw new ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
result.newSegments.Commit(result.dir);
}
private static bool assertsOn;
private static bool TestAsserts()
{
assertsOn = true;
return true;
}
private static bool AssertsOn()
{
System.Diagnostics.Debug.Assert(TestAsserts());
return assertsOn;
}
/** Command-line interface to check and fix an index.
Run it like this:
java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
-fix
: actually write a new segments_N file, removing any problematic segments
-segment X
: only check the specified
segment(s). This can be specified multiple times,
to check more than one segment, eg -segment _2
-segment _a
. You can't use this with the -fix
option.
WARNING : -fix
should only be used on an emergency basis as it will cause
documents (perhaps many) to be permanently removed from the index. Always make
a backup copy of your index before running this! Do not run this tool on an index
that is actively being written to. You have been warned!
Run without -fix, this tool will open the index, report version information
and report any exceptions it hits and what action it would take if -fix were
specified. With -fix, this tool will remove any segments that have issues and
write a new segments_N file. This means all documents contained in the affected
segments will be removed.
This tool exits with exit code 1 if the index cannot be opened or has any
corruption, else 0.
*/
[STAThread]
public static void Main(string[] args)
{
bool doFix = false;
IList onlySegments = new List();
string indexPath = null;
int i = 0;
while (i < args.Length)
{
if (args[i].Equals("-fix"))
{
doFix = true;
i++;
}
else if (args[i].Equals("-segment"))
{
if (i == args.Length - 1)
{
System.Console.WriteLine("ERROR: missing name for -segment option");
System.Environment.Exit(1);
}
onlySegments.Add(args[i + 1]);
i += 2;
}
else
{
if (indexPath != null)
{
System.Console.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
System.Environment.Exit(1);
}
indexPath = args[i];
i++;
}
}
if (indexPath == null)
{
System.Console.WriteLine("\nERROR: index path not specified");
System.Console.WriteLine("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
"\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" +
" times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
" You can't use this with the -fix option\n" +
"\n" +
"**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
"documents (perhaps many) to be permanently removed from the index. Always make\n" +
"a backup copy of your index before running this! Do not run this tool on an index\n" +
"that is actively being written to. You have been warned!\n" +
"\n" +
"Run without -fix, this tool will open the index, report version information\n" +
"and report any exceptions it hits and what action it would take if -fix were\n" +
"specified. With -fix, this tool will remove any segments that have issues and\n" +
"write a new segments_N file. This means all documents contained in the affected\n" +
"segments will be removed.\n" +
"\n" +
"This tool exits with exit code 1 if the index cannot be opened or has any\n" +
"corruption, else 0.\n");
System.Environment.Exit(1);
}
if (!AssertsOn())
System.Console.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
if (onlySegments.Count == 0)
onlySegments = null;
else if (doFix)
{
System.Console.WriteLine("ERROR: cannot specify both -fix and -segment");
System.Environment.Exit(1);
}
System.Console.WriteLine("\nOpening index @ " + indexPath + "\n");
Directory dir = null;
try
{
dir = FSDirectory.GetDirectory(indexPath);
}
catch (System.Exception t)
{
System.Console.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
System.Console.WriteLine(t.StackTrace);
System.Environment.Exit(1);
}
CheckIndex checker = new CheckIndex(dir);
checker.SetInfoStream(new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding));
Status result = checker.CheckIndex_Renamed(onlySegments);
if (!result.clean)
{
if (!doFix)
{
System.Console.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
}
else
{
System.Console.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
System.Console.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
for (int s = 0; s < 5; s++)
{
try
{
System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64)1000 * 1000));
}
catch (System.Threading.ThreadInterruptedException)
{
SupportClass.ThreadClass.Current().Interrupt();
s--;
continue;
}
System.Console.WriteLine(" " + (5 - s) + "...");
}
System.Console.WriteLine("Writing...");
checker.FixIndex(result);
System.Console.WriteLine("OK");
System.Console.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
}
}
System.Console.WriteLine("");
int exitCode;
if (result != null && result.clean == true)
exitCode = 0;
else
exitCode = 1;
System.Environment.Exit(exitCode);
}
}
}
// /// Returns true if index is clean, else false.
// public static bool Check(Directory dir, bool doFix)
// {
// System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
// SegmentInfos sis = new SegmentInfos();
// try
// {
// sis.Read(dir);
// }
// catch (System.Exception t)
// {
// out_Renamed.WriteLine("ERROR: could not read any segments file in directory");
// out_Renamed.Write(t.StackTrace);
// out_Renamed.Flush();
// return false;
// }
// int numSegments = sis.Count;
// System.string segmentsFileName = sis.GetCurrentSegmentFileName();
// IndexInput input = null;
// try
// {
// input = dir.OpenInput(segmentsFileName);
// }
// catch (System.Exception t)
// {
// out_Renamed.WriteLine("ERROR: could not open segments file in directory");
// out_Renamed.Write(t.StackTrace);
// out_Renamed.Flush();
// return false;
// }
// int format = 0;
// try
// {
// format = input.ReadInt();
// }
// catch (System.Exception t)
// {
// out_Renamed.WriteLine("ERROR: could not read segment file version in directory");
// out_Renamed.Write(t.StackTrace);
// out_Renamed.Flush();
// return false;
// }
// finally
// {
// if (input != null)
// input.Close();
// }
// System.string sFormat = "";
// bool skip = false;
// if (format == SegmentInfos.FORMAT)
// sFormat = "FORMAT [Lucene Pre-2.1]";
// if (format == SegmentInfos.FORMAT_LOCKLESS)
// sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
// else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
// sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
// else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
// sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
// else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE)
// {
// sFormat = "int=" + format + " [newer version of Lucene than this tool]";
// skip = true;
// }
// else
// {
// sFormat = format + " [Lucene 1.3 or prior]";
// }
// out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);
// if (skip)
// {
// out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
// return false;
// }
// SegmentInfos newSIS = (SegmentInfos) sis.Clone();
// newSIS.Clear();
// bool changed = false;
// int totLoseDocCount = 0;
// int numBadSegments = 0;
// for (int i = 0; i < numSegments; i++)
// {
// SegmentInfo info = sis.Info(i);
// out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
// int toLoseDocCount = info.docCount;
// SegmentReader reader = null;
// try
// {
// out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile());
// out_Renamed.WriteLine(" numFiles=" + info.Files().Count);
// out_Renamed.WriteLine(string.Format(nf, " size (MB)={0:f}", new object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
// int docStoreOffset = info.GetDocStoreOffset();
// if (docStoreOffset != - 1)
// {
// out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset);
// out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment());
// out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
// }
// System.string delFileName = info.GetDelFileName();
// if (delFileName == null)
// out_Renamed.WriteLine(" no deletions");
// else
// out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]");
// out_Renamed.Write(" test: open reader.........");
// reader = SegmentReader.Get(info);
// int numDocs = reader.NumDocs();
// toLoseDocCount = numDocs;
// if (reader.HasDeletions())
// out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]");
// else
// out_Renamed.WriteLine("OK");
// out_Renamed.Write(" test: fields, norms.......");
// System.Collections.ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
// System.Collections.IEnumerator it = fieldNames.GetEnumerator();
// while (it.MoveNext())
// {
// System.string fieldName = (System.string) it.Current;
// byte[] b = reader.Norms(fieldName);
// if (b.Length != info.docCount)
// throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
// }
// out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]");
// out_Renamed.Write(" test: terms, freq, prox...");
// TermEnum termEnum = reader.Terms();
// TermPositions termPositions = reader.TermPositions();
// // Used only to count up # deleted docs for this
// // term
// MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
// long termCount = 0;
// long totFreq = 0;
// long totPos = 0;
// while (termEnum.Next())
// {
// termCount++;
// Term term = termEnum.Term();
// int docFreq = termEnum.DocFreq();
// termPositions.Seek(term);
// int lastDoc = - 1;
// int freq0 = 0;
// totFreq += docFreq;
// while (termPositions.Next())
// {
// freq0++;
// int doc = termPositions.Doc();
// int freq = termPositions.Freq();
// if (doc <= lastDoc)
// {
// throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc);
// }
// lastDoc = doc;
// if (freq <= 0)
// {
// throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
// }
// int lastPos = - 1;
// totPos += freq;
// for (int j = 0; j < freq; j++)
// {
// int pos = termPositions.NextPosition();
// if (pos < -1)
// {
// throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
// }
// if (pos < lastPos)
// {
// throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
// }
// }
// }
// // Now count how many deleted docs occurred in
// // this term:
// int delCount;
// if (reader.HasDeletions())
// {
// myTermDocs.Seek(term);
// while (myTermDocs.Next())
// {
// }
// delCount = myTermDocs.delCount;
// }
// else
// delCount = 0;
// if (freq0 + delCount != docFreq)
// {
// throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
// }
// }
// out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
// out_Renamed.Write(" test: stored fields.......");
// int docCount = 0;
// long totFields = 0;
// for (int j = 0; j < info.docCount; j++)
// if (!reader.IsDeleted(j))
// {
// docCount++;
// Document doc = reader.Document(j);
// totFields += doc.GetFields().Count;
// }
// if (docCount != reader.NumDocs())
// throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
// out_Renamed.WriteLine(string.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { totFields, (((float)totFields) / docCount) }));
// out_Renamed.Write(" test: term vectors........");
// int totVectors = 0;
// for (int j = 0; j < info.docCount; j++)
// if (!reader.IsDeleted(j))
// {
// TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
// if (tfv != null)
// totVectors += tfv.Length;
// }
// out_Renamed.WriteLine(string.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { totVectors, (((float)totVectors) / docCount) }));
// out_Renamed.WriteLine("");
// }
// catch (System.Exception t)
// {
// out_Renamed.WriteLine("FAILED");
// System.string comment;
// if (doFix)
// comment = "will remove reference to this segment (-fix is specified)";
// else
// comment = "would remove reference to this segment (-fix was not specified)";
// out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:");
// out_Renamed.Write(t.StackTrace);
// out_Renamed.Flush();
// out_Renamed.WriteLine("");
// totLoseDocCount += toLoseDocCount;
// numBadSegments++;
// changed = true;
// continue;
// }
// finally
// {
// if (reader != null)
// reader.Close();
// }
// // Keeper
// newSIS.Add(info.Clone());
// }
// if (!changed)
// {
// out_Renamed.WriteLine("No problems were detected with this index.\n");
// return true;
// }
// else
// {
// out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected");
// if (doFix)
// out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost");
// else
// out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified");
// out_Renamed.WriteLine();
// }
// if (doFix)
// {
// out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
// for (int i = 0; i < 5; i++)
// {
// try
// {
// System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
// }
// catch (System.Threading.ThreadInterruptedException)
// {
// SupportClass.ThreadClass.Current().Interrupt();
// i--;
// continue;
// }
// out_Renamed.WriteLine(" " + (5 - i) + "...");
// }
// out_Renamed.Write("Writing...");
// try
// {
// newSIS.Write(dir);
// }
// catch (System.Exception t)
// {
// out_Renamed.WriteLine("FAILED; exiting");
// out_Renamed.Write(t.StackTrace);
// out_Renamed.Flush();
// return false;
// }
// out_Renamed.WriteLine("OK");
// out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\"");
// }
// else
// {
// out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]");
// }
// out_Renamed.WriteLine("");
// return false;
// }
// static bool assertsOn;
// private static bool TestAsserts()
// {
// assertsOn = true;
// return true;
// }
// [STAThread]
// public static void Main(System.string[] args)
// {
// bool doFix = false;
// for (int i = 0; i < args.Length; i++)
// if (args[i].Equals("-fix"))
// {
// doFix = true;
// break;
// }
// if (args.Length != (doFix ? 2 : 1))
// {
// out_Renamed.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has has any\n" + "corruption, else 0.\n");
// System.Environment.Exit(1);
// }
// System.Diagnostics.Debug.Assert(TestAsserts());
// if (!assertsOn)
// System.Console.WriteLine("\nNote: testing will be more thorough if you run with System.Diagnostic.Debug.Assert() enabled.");
// System.string dirName = args[0];
// out_Renamed.WriteLine("\nOpening index @ " + dirName + "\n");
// Directory dir = null;
// try
// {
// dir = FSDirectory.GetDirectory(dirName);
// }
// catch (System.Exception t)
// {
// out_Renamed.WriteLine("ERROR: could not open directory \"" + dirName + "\"; exiting");
// out_Renamed.Write(t.StackTrace);
// out_Renamed.Flush();
// System.Environment.Exit(1);
// }
// bool isClean = Check(dir, doFix);
// int exitCode;
// if (isClean)
// exitCode = 0;
// else
// exitCode = 1;
// System.Environment.Exit(exitCode);
// }
// static CheckIndex()
// {
// System.IO.StreamWriter temp_writer;
// temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
// temp_writer.AutoFlush = true;
// out_Renamed = temp_writer;
// }