/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using IndexReader = Lucene.Net.Index.IndexReader; using FieldCache = Lucene.Net.Search.FieldCache; using CacheEntry = Lucene.Net.Search.CacheEntry; namespace Lucene.Net.Util { ///

Provides methods for sanity checking that entries in the FieldCache /// are not wasteful or inconsistent. ///

///

/// Lucene 2.9 Introduced numerous enhancements into how the FieldCache /// is used by the low levels of Lucene searching (for Sorting and /// ValueSourceQueries) to improve both the speed for Sorting, as well /// as reopening of IndexReaders. But these changes have shifted the /// usage of FieldCache from "top level" IndexReaders (frequently a /// MultiReader or DirectoryReader) down to the leaf level SegmentReaders. /// As a result, existing applications that directly access the FieldCache /// may find RAM usage increase significantly when upgrading to 2.9 or /// Later. This class provides an API for these applications (or their /// Unit tests) to check at run time if the FieldCache contains "insane" /// usages of the FieldCache. ///

///

/// EXPERIMENTAL API: This API is considered extremely advanced and /// experimental. It may be removed or altered w/o warning in future releases /// of Lucene. ///

///

/// /// /// /// /// /// public sealed class FieldCacheSanityChecker { private RamUsageEstimator ramCalc = null; public FieldCacheSanityChecker() { /* NOOP */ } ///

If set, will be used to estimate size for all CacheEntry objects /// dealt with. ///

public void SetRamUsageEstimator(RamUsageEstimator r) { ramCalc = r; } ///

Quick and dirty convenience method

/// /// public static Insanity[] CheckSanity(FieldCache cache) { return CheckSanity(cache.GetCacheEntries()); } ///

Quick and dirty convenience method that instantiates an instance with /// "good defaults" and uses it to test the CacheEntrys ///

/// /// public static Insanity[] CheckSanity(params CacheEntry[] cacheEntries) { FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker(); // doesn't check for interned sanityChecker.SetRamUsageEstimator(new RamUsageEstimator(false)); return sanityChecker.Check(cacheEntries); } ///

Tests a CacheEntry[] for indication of "insane" cache usage. ///

/// NOTE:FieldCache CreationPlaceholder objects are ignored. /// (:TODO: is this a bad idea? are we masking a real problem?) ///

///

public Insanity[] Check(params CacheEntry[] cacheEntries) { if (null == cacheEntries || 0 == cacheEntries.Length) return new Insanity[0]; if (null != ramCalc) { for (int i = 0; i < cacheEntries.Length; i++) { cacheEntries[i].EstimateSize(ramCalc); } } // the indirect mapping lets MapOfSet dedup identical valIds for us // // maps the (valId) identityhashCode of cache values to // sets of CacheEntry instances MapOfSets valIdToItems = new MapOfSets(new Dictionary>(17)); // maps ReaderField keys to Sets of ValueIds MapOfSets readerFieldToValIds = new MapOfSets(new Dictionary>(17)); // // any keys that we know result in more then one valId HashSet valMismatchKeys = new HashSet(); // iterate over all the cacheEntries to get the mappings we'll need for (int i = 0; i < cacheEntries.Length; i++) { CacheEntry item = cacheEntries[i]; System.Object val = item.Value; if (val is Lucene.Net.Search.CreationPlaceholder) continue; ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName); System.Int32 valId = val.GetHashCode(); // indirect mapping, so the MapOfSet will dedup identical valIds for us valIdToItems.Put(valId, item); if (1 < readerFieldToValIds.Put(rf, valId)) { valMismatchKeys.Add(rf); } } List insanity = new List(valMismatchKeys.Count * 3); insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds)); return insanity.ToArray(); } ///

Internal helper method used by check that iterates over /// valMismatchKeys and generates a Collection of Insanity /// instances accordingly. The MapOfSets are used to populate /// the Insantiy objects. ///

/// /// private List CheckValueMismatch(MapOfSets valIdToItems, MapOfSets readerFieldToValIds, HashSet valMismatchKeys) { List insanity = new List(valMismatchKeys.Count * 3); if (!(valMismatchKeys.Count == 0)) { // we have multiple values for some ReaderFields IDictionary> rfMap = readerFieldToValIds.Map; IDictionary> valMap = valIdToItems.Map; foreach (ReaderField rf in valMismatchKeys) { List badEntries = new List(valMismatchKeys.Count * 2); foreach (int val in rfMap[rf]) { foreach (CacheEntry entry in valMap[val]) { badEntries.Add(entry); } } insanity.Add(new Insanity(InsanityType.VALUEMISMATCH, "Multiple distinct value objects for " + rf.ToString(), badEntries.ToArray())); } } return insanity; } ///

Internal helper method used by check that iterates over /// the keys of readerFieldToValIds and generates a Collection /// of Insanity instances whenever two (or more) ReaderField instances are /// found that have an ancestery relationships. /// ///

/// /// private List CheckSubreaders(MapOfSets valIdToItems, MapOfSets readerFieldToValIds) { List insanity = new List(23); Dictionary> badChildren = new Dictionary>(17); MapOfSets badKids = new MapOfSets(badChildren); // wrapper IDictionary> viToItemSets = valIdToItems.Map; IDictionary> rfToValIdSets = readerFieldToValIds.Map; HashSet seen = new HashSet(); foreach (ReaderField rf in rfToValIdSets.Keys) { if (seen.Contains(rf)) continue; System.Collections.IList kids = GetAllDecendentReaderKeys(rf.readerKey); foreach (Object kidKey in kids) { ReaderField kid = new ReaderField(kidKey, rf.fieldName); if (badChildren.ContainsKey(kid)) { // we've already process this kid as RF and found other problems // track those problems as our own badKids.Put(rf, kid); badKids.PutAll(rf, badChildren[kid]); badChildren.Remove(kid); } else if (rfToValIdSets.ContainsKey(kid)) { // we have cache entries for the kid badKids.Put(rf, kid); } seen.Add(kid); } seen.Add(rf); } // every mapping in badKids represents an Insanity foreach (ReaderField parent in badChildren.Keys) { HashSet kids = badChildren[parent]; List badEntries = new List(kids.Count * 2); // put parent entr(ies) in first { foreach (int val in rfToValIdSets[parent]) { badEntries.AddRange(viToItemSets[val]); } } // now the entries for the descendants foreach (ReaderField kid in kids) { foreach (int val in rfToValIdSets[kid]) { badEntries.AddRange(viToItemSets[val]); } } insanity.Add(new Insanity(InsanityType.SUBREADER, "Found caches for decendents of " + parent.ToString(), badEntries.ToArray())); } return insanity; } ///

Checks if the seed is an IndexReader, and if so will walk /// the hierarchy of subReaders building up a list of the objects /// returned by obj.getFieldCacheKey() ///

private System.Collections.IList GetAllDecendentReaderKeys(System.Object seed) { List