/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using IndexReader = Lucene.Net.Index.IndexReader;
using FieldCache = Lucene.Net.Search.FieldCache;
using CacheEntry = Lucene.Net.Search.CacheEntry;
namespace Lucene.Net.Util
{
/// Provides methods for sanity checking that entries in the FieldCache
/// are not wasteful or inconsistent.
///
///
/// Lucene 2.9 Introduced numerous enhancements into how the FieldCache
/// is used by the low levels of Lucene searching (for Sorting and
/// ValueSourceQueries) to improve both the speed for Sorting, as well
/// as reopening of IndexReaders. But these changes have shifted the
/// usage of FieldCache from "top level" IndexReaders (frequently a
/// MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
/// As a result, existing applications that directly access the FieldCache
/// may find RAM usage increase significantly when upgrading to 2.9 or
/// Later. This class provides an API for these applications (or their
/// Unit tests) to check at run time if the FieldCache contains "insane"
/// usages of the FieldCache.
///
///
/// EXPERIMENTAL API: This API is considered extremely advanced and
/// experimental. It may be removed or altered w/o warning in future releases
/// of Lucene.
///
///
///
///
///
///
///
///
public sealed class FieldCacheSanityChecker
{
private RamUsageEstimator ramCalc = null;
public FieldCacheSanityChecker()
{
/* NOOP */
}
/// If set, will be used to estimate size for all CacheEntry objects
/// dealt with.
///
public void SetRamUsageEstimator(RamUsageEstimator r)
{
ramCalc = r;
}
/// Quick and dirty convenience method
///
///
public static Insanity[] CheckSanity(FieldCache cache)
{
return CheckSanity(cache.GetCacheEntries());
}
/// Quick and dirty convenience method that instantiates an instance with
/// "good defaults" and uses it to test the CacheEntrys
///
///
///
public static Insanity[] CheckSanity(params CacheEntry[] cacheEntries)
{
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
// doesn't check for interned
sanityChecker.SetRamUsageEstimator(new RamUsageEstimator(false));
return sanityChecker.Check(cacheEntries);
}
/// Tests a CacheEntry[] for indication of "insane" cache usage.
///
/// NOTE:FieldCache CreationPlaceholder objects are ignored.
/// (:TODO: is this a bad idea? are we masking a real problem?)
///
///
public Insanity[] Check(params CacheEntry[] cacheEntries)
{
if (null == cacheEntries || 0 == cacheEntries.Length)
return new Insanity[0];
if (null != ramCalc)
{
for (int i = 0; i < cacheEntries.Length; i++)
{
cacheEntries[i].EstimateSize(ramCalc);
}
}
// the indirect mapping lets MapOfSet dedup identical valIds for us
//
// maps the (valId) identityhashCode of cache values to
// sets of CacheEntry instances
MapOfSets valIdToItems = new MapOfSets(new Dictionary>(17));
// maps ReaderField keys to Sets of ValueIds
MapOfSets readerFieldToValIds = new MapOfSets(new Dictionary>(17));
//
// any keys that we know result in more then one valId
HashSet valMismatchKeys = new HashSet();
// iterate over all the cacheEntries to get the mappings we'll need
for (int i = 0; i < cacheEntries.Length; i++)
{
CacheEntry item = cacheEntries[i];
System.Object val = item.Value;
if (val is Lucene.Net.Search.CreationPlaceholder)
continue;
ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName);
System.Int32 valId = val.GetHashCode();
// indirect mapping, so the MapOfSet will dedup identical valIds for us
valIdToItems.Put(valId, item);
if (1 < readerFieldToValIds.Put(rf, valId))
{
valMismatchKeys.Add(rf);
}
}
List insanity = new List(valMismatchKeys.Count * 3);
insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys));
insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds));
return insanity.ToArray();
}
/// Internal helper method used by check that iterates over
/// valMismatchKeys and generates a Collection of Insanity
/// instances accordingly. The MapOfSets are used to populate
/// the Insantiy objects.
///
///
///
private List CheckValueMismatch(MapOfSets valIdToItems,
MapOfSets readerFieldToValIds,
HashSet valMismatchKeys)
{
List insanity = new List(valMismatchKeys.Count * 3);
if (!(valMismatchKeys.Count == 0))
{
// we have multiple values for some ReaderFields
IDictionary> rfMap = readerFieldToValIds.Map;
IDictionary> valMap = valIdToItems.Map;
foreach (ReaderField rf in valMismatchKeys)
{
List badEntries = new List(valMismatchKeys.Count * 2);
foreach (int val in rfMap[rf])
{
foreach (CacheEntry entry in valMap[val])
{
badEntries.Add(entry);
}
}
insanity.Add(new Insanity(InsanityType.VALUEMISMATCH, "Multiple distinct value objects for " + rf.ToString(), badEntries.ToArray()));
}
}
return insanity;
}
/// Internal helper method used by check that iterates over
/// the keys of readerFieldToValIds and generates a Collection
/// of Insanity instances whenever two (or more) ReaderField instances are
/// found that have an ancestery relationships.
///
///
///
///
private List CheckSubreaders(MapOfSets valIdToItems,
MapOfSets readerFieldToValIds)
{
List insanity = new List(23);
Dictionary> badChildren = new Dictionary>(17);
MapOfSets badKids = new MapOfSets(badChildren); // wrapper
IDictionary> viToItemSets = valIdToItems.Map;
IDictionary> rfToValIdSets = readerFieldToValIds.Map;
HashSet seen = new HashSet();
foreach (ReaderField rf in rfToValIdSets.Keys)
{
if (seen.Contains(rf))
continue;
System.Collections.IList kids = GetAllDecendentReaderKeys(rf.readerKey);
foreach (Object kidKey in kids)
{
ReaderField kid = new ReaderField(kidKey, rf.fieldName);
if (badChildren.ContainsKey(kid))
{
// we've already process this kid as RF and found other problems
// track those problems as our own
badKids.Put(rf, kid);
badKids.PutAll(rf, badChildren[kid]);
badChildren.Remove(kid);
}
else if (rfToValIdSets.ContainsKey(kid))
{
// we have cache entries for the kid
badKids.Put(rf, kid);
}
seen.Add(kid);
}
seen.Add(rf);
}
// every mapping in badKids represents an Insanity
foreach (ReaderField parent in badChildren.Keys)
{
HashSet kids = badChildren[parent];
List badEntries = new List(kids.Count * 2);
// put parent entr(ies) in first
{
foreach (int val in rfToValIdSets[parent])
{
badEntries.AddRange(viToItemSets[val]);
}
}
// now the entries for the descendants
foreach (ReaderField kid in kids)
{
foreach (int val in rfToValIdSets[kid])
{
badEntries.AddRange(viToItemSets[val]);
}
}
insanity.Add(new Insanity(InsanityType.SUBREADER, "Found caches for decendents of " + parent.ToString(), badEntries.ToArray()));
}
return insanity;
}
/// Checks if the seed is an IndexReader, and if so will walk
/// the hierarchy of subReaders building up a list of the objects
/// returned by obj.getFieldCacheKey()
///
private System.Collections.IList GetAllDecendentReaderKeys(System.Object seed)
{
List