/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Lucene.Net.Support;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using BitVector = Lucene.Net.Util.BitVector;
namespace Lucene.Net.Index
{
/// Information about a segment such as it's name, directory, and files related
/// to the segment.
///
/// * NOTE: This API is new and still experimental
/// (subject to change suddenly in the next release)
///
public sealed class SegmentInfo : System.ICloneable
{
internal const int NO = - 1; // e.g. no norms; no deletes;
internal const int YES = 1; // e.g. have norms; have deletes;
internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public System.String name; // unique name in dir
public int docCount; // number of docs in seg
public Directory dir; // where segment resides
private bool preLockless; // true if this is a segments file written before
// lock-less commits (2.1)
private long delGen; // current generation of del file; NO if there
// are no deletes; CHECK_DIR if it's a pre-2.1 segment
// (and we must check filesystem); YES or higher if
// there are deletes at generation N
private long[] normGen; // current generation of each field's norm file.
// If this array is null, for lockLess this means no
// separate norms. For preLockLess this means we must
// check filesystem. If this array is not null, its
// values mean: NO says this field has no separate
// norms; CHECK_DIR says it is a preLockLess segment and
// filesystem must be checked; >= YES says this field
// has separate norms with the specified generation
private sbyte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
// pre-2.1 (ie, must check file system to see
// if .cfs and .nrm exist)
private bool hasSingleNormFile; // true if this segment maintains norms in a single file;
// false otherwise
// this is currently false for segments populated by DocumentWriter
// and true for newly created merged segments (both
// compound and non compound).
private IList files; // cached list of files that this segment uses
// in the Directory
internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand)
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
private System.String docStoreSegment; // name used to derive fields/vectors file we share with
// other segments
private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
private int delCount; // How many deleted docs in this segment, or -1 if not yet known
// (if it's an older index)
private bool hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
private IDictionary diagnostics;
public override System.String ToString()
{
return "si: " + dir.ToString() + " " + name + " docCount: " + docCount + " delCount: " + delCount + " delFileName: " + GetDelFileName();
}
public SegmentInfo(System.String name, int docCount, Directory dir)
{
this.name = name;
this.docCount = docCount;
this.dir = dir;
delGen = NO;
isCompoundFile = (sbyte) (CHECK_DIR);
preLockless = true;
hasSingleNormFile = false;
docStoreOffset = - 1;
docStoreSegment = name;
docStoreIsCompoundFile = false;
delCount = 0;
hasProx = true;
}
public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile):this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false, true)
{
}
public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile, bool hasProx):this(name, docCount, dir)
{
this.isCompoundFile = (sbyte) (isCompoundFile?YES:NO);
this.hasSingleNormFile = hasSingleNormFile;
preLockless = false;
this.docStoreOffset = docStoreOffset;
this.docStoreSegment = docStoreSegment;
this.docStoreIsCompoundFile = docStoreIsCompoundFile;
this.hasProx = hasProx;
delCount = 0;
System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null, "dso=" + docStoreOffset + " dss=" + docStoreSegment + " docCount=" + docCount);
}
/// Copy everything from src SegmentInfo into our instance.
internal void Reset(SegmentInfo src)
{
ClearFiles();
name = src.name;
docCount = src.docCount;
dir = src.dir;
preLockless = src.preLockless;
delGen = src.delGen;
docStoreOffset = src.docStoreOffset;
docStoreIsCompoundFile = src.docStoreIsCompoundFile;
if (src.normGen == null)
{
normGen = null;
}
else
{
normGen = new long[src.normGen.Length];
Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
}
isCompoundFile = src.isCompoundFile;
hasSingleNormFile = src.hasSingleNormFile;
delCount = src.delCount;
}
public IDictionary Diagnostics
{
get { return diagnostics; }
internal set { this.diagnostics = value; }
}
/// Construct a new SegmentInfo instance by reading a
/// previously saved SegmentInfo from input.
///
///
/// directory to load from
///
/// format of the segments info file
///
/// input handle to read segment info from
///
internal SegmentInfo(Directory dir, int format, IndexInput input)
{
this.dir = dir;
name = input.ReadString();
docCount = input.ReadInt();
if (format <= SegmentInfos.FORMAT_LOCKLESS)
{
delGen = input.ReadLong();
if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
{
docStoreOffset = input.ReadInt();
if (docStoreOffset != - 1)
{
docStoreSegment = input.ReadString();
docStoreIsCompoundFile = (1 == input.ReadByte());
}
else
{
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
}
else
{
docStoreOffset = - 1;
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
{
hasSingleNormFile = (1 == input.ReadByte());
}
else
{
hasSingleNormFile = false;
}
int numNormGen = input.ReadInt();
if (numNormGen == NO)
{
normGen = null;
}
else
{
normGen = new long[numNormGen];
for (int j = 0; j < numNormGen; j++)
{
normGen[j] = input.ReadLong();
}
}
isCompoundFile = (sbyte) input.ReadByte();
preLockless = (isCompoundFile == CHECK_DIR);
if (format <= SegmentInfos.FORMAT_DEL_COUNT)
{
delCount = input.ReadInt();
System.Diagnostics.Debug.Assert(delCount <= docCount);
}
else
delCount = - 1;
if (format <= SegmentInfos.FORMAT_HAS_PROX)
hasProx = input.ReadByte() == 1;
else
hasProx = true;
if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
{
diagnostics = input.ReadStringStringMap();
}
else
{
diagnostics = new Dictionary();
}
}
else
{
delGen = CHECK_DIR;
normGen = null;
isCompoundFile = (sbyte) (CHECK_DIR);
preLockless = true;
hasSingleNormFile = false;
docStoreOffset = - 1;
docStoreIsCompoundFile = false;
docStoreSegment = null;
delCount = - 1;
hasProx = true;
diagnostics = new Dictionary();
}
}
internal void SetNumFields(int numFields)
{
if (normGen == null)
{
// normGen is null if we loaded a pre-2.1 segment
// file, or, if this segments file hasn't had any
// norms set against it yet:
normGen = new long[numFields];
if (preLockless)
{
// Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
// we have to check filesystem for norm files, because this is prelockless.
}
else
{
// This is a FORMAT_LOCKLESS segment, which means
// there are no separate norms:
for (int i = 0; i < numFields; i++)
{
normGen[i] = NO;
}
}
}
}
/// Returns total size in bytes of all of files used by
/// this segment.
///
public long SizeInBytes()
{
if (sizeInBytes == - 1)
{
IList files = Files();
int size = files.Count;
sizeInBytes = 0;
for (int i = 0; i < size; i++)
{
System.String fileName = files[i];
// We don't count bytes used by a shared doc store
// against this segment:
if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName))
sizeInBytes += dir.FileLength(fileName);
}
}
return sizeInBytes;
}
public bool HasDeletions()
{
// Cases:
//
// delGen == NO: this means this segment was written
// by the LOCKLESS code and for certain does not have
// deletions yet
//
// delGen == CHECK_DIR: this means this segment was written by
// pre-LOCKLESS code which means we must check
// directory to see if .del file exists
//
// delGen >= YES: this means this segment was written by
// the LOCKLESS code and for certain has
// deletions
//
if (delGen == NO)
{
return false;
}
else if (delGen >= YES)
{
return true;
}
else
{
return dir.FileExists(GetDelFileName());
}
}
internal void AdvanceDelGen()
{
// delGen 0 is reserved for pre-LOCKLESS format
if (delGen == NO)
{
delGen = YES;
}
else
{
delGen++;
}
ClearFiles();
}
internal void ClearDelGen()
{
delGen = NO;
ClearFiles();
}
public System.Object Clone()
{
SegmentInfo si = new SegmentInfo(name, docCount, dir);
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
si.delCount = delCount;
si.hasProx = hasProx;
si.preLockless = preLockless;
si.hasSingleNormFile = hasSingleNormFile;
si.diagnostics = new HashMap(this.diagnostics);
if (this.diagnostics != null)
{
si.diagnostics = new System.Collections.Generic.Dictionary();
foreach (string o in diagnostics.Keys)
{
si.diagnostics.Add(o,diagnostics[o]);
}
}
if (normGen != null)
{
si.normGen = new long[normGen.Length];
normGen.CopyTo(si.normGen, 0);
}
si.docStoreOffset = docStoreOffset;
si.docStoreSegment = docStoreSegment;
si.docStoreIsCompoundFile = docStoreIsCompoundFile;
if (this.files != null)
{
si.files = new System.Collections.Generic.List();
foreach (string file in files)
{
si.files.Add(file);
}
}
return si;
}
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
public System.String GetDelFileName()
{
if (delGen == NO)
{
// In this case we know there is no deletion filename
// against this segment
return null;
}
else
{
// If delGen is CHECK_DIR, it's the pre-lockless-commit file format
return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
}
}
/// Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
///
///
/// the field index to check
///
public bool HasSeparateNorms(int fieldNumber)
{
if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR))
{
// Must fallback to directory file exists check:
System.String fileName = name + ".s" + fieldNumber;
return dir.FileExists(fileName);
}
else if (normGen == null || normGen[fieldNumber] == NO)
{
return false;
}
else
{
return true;
}
}
/// Returns true if any fields in this segment have separate norms.
public bool HasSeparateNorms()
{
if (normGen == null)
{
if (!preLockless)
{
// This means we were created w/ LOCKLESS code and no
// norms are written yet:
return false;
}
else
{
// This means this segment was saved with pre-LOCKLESS
// code. So we must fallback to the original
// directory list check:
System.String[] result = dir.ListAll();
if (result == null)
{
throw new System.IO.IOException("cannot read directory " + dir + ": ListAll() returned null");
}
IndexFileNameFilter filter = IndexFileNameFilter.Filter;
System.String pattern;
pattern = name + ".s";
int patternLength = pattern.Length;
for (int i = 0; i < result.Length; i++)
{
string fileName = result[i];
if (filter.Accept(null, fileName) && fileName.StartsWith(pattern) && char.IsDigit(fileName[patternLength]))
return true;
}
return false;
}
}
else
{
// This means this segment was saved with LOCKLESS
// code so we first check whether any normGen's are >= 1
// (meaning they definitely have separate norms):
for (int i = 0; i < normGen.Length; i++)
{
if (normGen[i] >= YES)
{
return true;
}
}
// Next we look for any == 0. These cases were
// pre-LOCKLESS and must be checked in directory:
for (int i = 0; i < normGen.Length; i++)
{
if (normGen[i] == CHECK_DIR)
{
if (HasSeparateNorms(i))
{
return true;
}
}
}
}
return false;
}
/// Increment the generation count for the norms file for
/// this field.
///
///
/// field whose norm file will be rewritten
///
internal void AdvanceNormGen(int fieldIndex)
{
if (normGen[fieldIndex] == NO)
{
normGen[fieldIndex] = YES;
}
else
{
normGen[fieldIndex]++;
}
ClearFiles();
}
/// Get the file name for the norms file for this field.
///
///
/// field index
///
public System.String GetNormFileName(int number)
{
System.String prefix;
long gen;
if (normGen == null)
{
gen = CHECK_DIR;
}
else
{
gen = normGen[number];
}
if (HasSeparateNorms(number))
{
// case 1: separate norm
prefix = ".s";
return IndexFileNames.FileNameFromGeneration(name, prefix + number, gen);
}
if (hasSingleNormFile)
{
// case 2: lockless (or nrm file exists) - single file for all norms
prefix = "." + IndexFileNames.NORMS_EXTENSION;
return IndexFileNames.FileNameFromGeneration(name, prefix, WITHOUT_GEN);
}
// case 3: norm file for each field
prefix = ".f";
return IndexFileNames.FileNameFromGeneration(name, prefix + number, WITHOUT_GEN);
}
/// Returns true if this segment is stored as a compound
/// file; else, false.
///
internal void SetUseCompoundFile(bool value)
{
if (value)
{
this.isCompoundFile = (sbyte) (YES);
}
else
{
this.isCompoundFile = (sbyte) (NO);
}
ClearFiles();
}
/// Returns true if this segment is stored as a compound
/// file; else, false.
///
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
public bool GetUseCompoundFile()
{
if (isCompoundFile == NO)
{
return false;
}
if (isCompoundFile == YES)
{
return true;
}
return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
}
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
public int GetDelCount()
{
if (delCount == - 1)
{
if (HasDeletions())
{
System.String delFileName = GetDelFileName();
delCount = new BitVector(dir, delFileName).Count();
}
else
delCount = 0;
}
System.Diagnostics.Debug.Assert(delCount <= docCount);
return delCount;
}
internal void SetDelCount(int delCount)
{
this.delCount = delCount;
System.Diagnostics.Debug.Assert(delCount <= docCount);
}
public int DocStoreOffset
{
get { return docStoreOffset; }
internal set
{
docStoreOffset = value;
ClearFiles();
}
}
public bool DocStoreIsCompoundFile
{
get { return docStoreIsCompoundFile; }
internal set
{
docStoreIsCompoundFile = value;
ClearFiles();
}
}
public string DocStoreSegment
{
get { return docStoreSegment; }
}
internal void SetDocStore(int offset, System.String segment, bool isCompoundFile)
{
docStoreOffset = offset;
docStoreSegment = segment;
docStoreIsCompoundFile = isCompoundFile;
}
/// Save this segment's info.
internal void Write(IndexOutput output)
{
output.WriteString(name);
output.WriteInt(docCount);
output.WriteLong(delGen);
output.WriteInt(docStoreOffset);
if (docStoreOffset != - 1)
{
output.WriteString(docStoreSegment);
output.WriteByte((byte) (docStoreIsCompoundFile?1:0));
}
output.WriteByte((byte) (hasSingleNormFile?1:0));
if (normGen == null)
{
output.WriteInt(NO);
}
else
{
output.WriteInt(normGen.Length);
for (int j = 0; j < normGen.Length; j++)
{
output.WriteLong(normGen[j]);
}
}
output.WriteByte((byte) isCompoundFile);
output.WriteInt(delCount);
output.WriteByte((byte) (hasProx?1:0));
output.WriteStringStringMap(diagnostics);
}
public bool HasProx
{
get { return hasProx; }
internal set
{
this.hasProx = value;
ClearFiles();
}
}
private void AddIfExists(IList files, System.String fileName)
{
if (dir.FileExists(fileName))
files.Add(fileName);
}
/*
* Return all files referenced by this SegmentInfo. The
* returns List is a locally cached List so you should not
* modify it.
*/
public IList Files()
{
if (files != null)
{
// Already cached:
return files;
}
var fileList = new System.Collections.Generic.List();
bool useCompoundFile = GetUseCompoundFile();
if (useCompoundFile)
{
fileList.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
}
else
{
System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
for (int i = 0; i < exts.Length; i++)
AddIfExists(fileList, name + "." + exts[i]);
}
if (docStoreOffset != - 1)
{
// We are sharing doc stores (stored fields, term
// vectors) with other segments
System.Diagnostics.Debug.Assert(docStoreSegment != null);
if (docStoreIsCompoundFile)
{
fileList.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
}
else
{
System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
for (int i = 0; i < exts.Length; i++)
AddIfExists(fileList, docStoreSegment + "." + exts[i]);
}
}
else if (!useCompoundFile)
{
// We are not sharing, and, these files were not
// included in the compound file
System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
for (int i = 0; i < exts.Length; i++)
AddIfExists(fileList, name + "." + exts[i]);
}
System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName)))
{
fileList.Add(delFileName);
}
// Careful logic for norms files
if (normGen != null)
{
for (int i = 0; i < normGen.Length; i++)
{
long gen = normGen[i];
if (gen >= YES)
{
// Definitely a separate norm file, with generation:
fileList.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
}
else if (NO == gen)
{
// No separate norms but maybe plain norms
// in the non compound file case:
if (!hasSingleNormFile && !useCompoundFile)
{
System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
if (dir.FileExists(fileName))
{
fileList.Add(fileName);
}
}
}
else if (CHECK_DIR == gen)
{
// Pre-2.1: we have to check file existence
System.String fileName = null;
if (useCompoundFile)
{
fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
}
else if (!hasSingleNormFile)
{
fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
}
if (fileName != null && dir.FileExists(fileName))
{
fileList.Add(fileName);
}
}
}
}
else if (preLockless || (!hasSingleNormFile && !useCompoundFile))
{
// Pre-2.1: we have to scan the dir to find all
// matching _X.sN/_X.fN files for our segment:
System.String prefix;
if (useCompoundFile)
prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
else
prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION;
int prefixLength = prefix.Length;
System.String[] allFiles = dir.ListAll();
IndexFileNameFilter filter = IndexFileNameFilter.Filter;
for (int i = 0; i < allFiles.Length; i++)
{
System.String fileName = allFiles[i];
if (filter.Accept(null, fileName) && fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix))
{
fileList.Add(fileName);
}
}
}
//System.Diagnostics.Debug.Assert();
files = fileList;
return files;
}
/* Called whenever any change is made that affects which
* files this segment has. */
private void ClearFiles()
{
files = null;
sizeInBytes = - 1;
}
/// Used for debugging
public System.String SegString(Directory dir)
{
System.String cfs;
try
{
if (GetUseCompoundFile())
cfs = "c";
else
cfs = "C";
}
catch (System.IO.IOException)
{
cfs = "?";
}
System.String docStore;
if (docStoreOffset != - 1)
docStore = "->" + docStoreSegment;
else
docStore = "";
return name + ":" + cfs + (this.dir == dir?"":"x") + docCount + docStore;
}
/// We consider another SegmentInfo instance equal if it
/// has the same dir and same name.
///
public override bool Equals(System.Object obj)
{
if (this == obj) return true;
if (obj is SegmentInfo)
{
SegmentInfo other = (SegmentInfo) obj;
return other.dir == dir && other.name.Equals(name);
}
return false;
}
public override int GetHashCode()
{
return dir.GetHashCode() + name.GetHashCode();
}
}
}