/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using IndexReader = Lucene.Net.Index.IndexReader; using FieldCache = Lucene.Net.Search.FieldCache; using CacheEntry = Lucene.Net.Search.CacheEntry; namespace Lucene.Net.Util { /// Provides methods for sanity checking that entries in the FieldCache /// are not wasteful or inconsistent. ///

///

/// Lucene 2.9 Introduced numerous enhancements into how the FieldCache /// is used by the low levels of Lucene searching (for Sorting and /// ValueSourceQueries) to improve both the speed for Sorting, as well /// as reopening of IndexReaders. But these changes have shifted the /// usage of FieldCache from "top level" IndexReaders (frequently a /// MultiReader or DirectoryReader) down to the leaf level SegmentReaders. /// As a result, existing applications that directly access the FieldCache /// may find RAM usage increase significantly when upgrading to 2.9 or /// Later. This class provides an API for these applications (or their /// Unit tests) to check at run time if the FieldCache contains "insane" /// usages of the FieldCache. ///

///

/// EXPERIMENTAL API: This API is considered extremely advanced and /// experimental. It may be removed or altered w/o warning in future releases /// of Lucene. ///

///

/// /// /// /// /// /// public sealed class FieldCacheSanityChecker { private RamUsageEstimator ramCalc = null; public FieldCacheSanityChecker() { /* NOOP */ } /// If set, will be used to estimate size for all CacheEntry objects /// dealt with. /// public void SetRamUsageEstimator(RamUsageEstimator r) { ramCalc = r; } /// Quick and dirty convenience method /// /// public static Insanity[] CheckSanity(FieldCache cache) { return CheckSanity(cache.GetCacheEntries()); } /// Quick and dirty convenience method that instantiates an instance with /// "good defaults" and uses it to test the CacheEntrys /// /// /// public static Insanity[] CheckSanity(params CacheEntry[] cacheEntries) { FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker(); // doesn't check for interned sanityChecker.SetRamUsageEstimator(new RamUsageEstimator(false)); return sanityChecker.Check(cacheEntries); } /// Tests a CacheEntry[] for indication of "insane" cache usage. ///

/// NOTE:FieldCache CreationPlaceholder objects are ignored. /// (:TODO: is this a bad idea? are we masking a real problem?) ///

///

public Insanity[] Check(params CacheEntry[] cacheEntries) { if (null == cacheEntries || 0 == cacheEntries.Length) return new Insanity[0]; if (null != ramCalc) { for (int i = 0; i < cacheEntries.Length; i++) { cacheEntries[i].EstimateSize(ramCalc); } } // the indirect mapping lets MapOfSet dedup identical valIds for us // // maps the (valId) identityhashCode of cache values to // sets of CacheEntry instances MapOfSets valIdToItems = new MapOfSets(new Dictionary>(17)); // maps ReaderField keys to Sets of ValueIds MapOfSets readerFieldToValIds = new MapOfSets(new Dictionary>(17)); // // any keys that we know result in more then one valId HashSet valMismatchKeys = new HashSet(); // iterate over all the cacheEntries to get the mappings we'll need for (int i = 0; i < cacheEntries.Length; i++) { CacheEntry item = cacheEntries[i]; System.Object val = item.Value; if (val is Lucene.Net.Search.CreationPlaceholder) continue; ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName); System.Int32 valId = val.GetHashCode(); // indirect mapping, so the MapOfSet will dedup identical valIds for us valIdToItems.Put(valId, item); if (1 < readerFieldToValIds.Put(rf, valId)) { valMismatchKeys.Add(rf); } } List insanity = new List(valMismatchKeys.Count * 3); insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds)); return insanity.ToArray(); } /// Internal helper method used by check that iterates over /// valMismatchKeys and generates a Collection of Insanity /// instances accordingly. The MapOfSets are used to populate /// the Insantiy objects. /// /// /// private List CheckValueMismatch(MapOfSets valIdToItems, MapOfSets readerFieldToValIds, HashSet valMismatchKeys) { List insanity = new List(valMismatchKeys.Count * 3); if (!(valMismatchKeys.Count == 0)) { // we have multiple values for some ReaderFields IDictionary> rfMap = readerFieldToValIds.Map; IDictionary> valMap = valIdToItems.Map; foreach (ReaderField rf in valMismatchKeys) { List badEntries = new List(valMismatchKeys.Count * 2); foreach (int val in rfMap[rf]) { foreach (CacheEntry entry in valMap[val]) { badEntries.Add(entry); } } insanity.Add(new Insanity(InsanityType.VALUEMISMATCH, "Multiple distinct value objects for " + rf.ToString(), badEntries.ToArray())); } } return insanity; } /// Internal helper method used by check that iterates over /// the keys of readerFieldToValIds and generates a Collection /// of Insanity instances whenever two (or more) ReaderField instances are /// found that have an ancestery relationships. /// /// /// /// private List CheckSubreaders(MapOfSets valIdToItems, MapOfSets readerFieldToValIds) { List insanity = new List(23); Dictionary> badChildren = new Dictionary>(17); MapOfSets badKids = new MapOfSets(badChildren); // wrapper IDictionary> viToItemSets = valIdToItems.Map; IDictionary> rfToValIdSets = readerFieldToValIds.Map; HashSet seen = new HashSet(); foreach (ReaderField rf in rfToValIdSets.Keys) { if (seen.Contains(rf)) continue; System.Collections.IList kids = GetAllDecendentReaderKeys(rf.readerKey); foreach (Object kidKey in kids) { ReaderField kid = new ReaderField(kidKey, rf.fieldName); if (badChildren.ContainsKey(kid)) { // we've already process this kid as RF and found other problems // track those problems as our own badKids.Put(rf, kid); badKids.PutAll(rf, badChildren[kid]); badChildren.Remove(kid); } else if (rfToValIdSets.ContainsKey(kid)) { // we have cache entries for the kid badKids.Put(rf, kid); } seen.Add(kid); } seen.Add(rf); } // every mapping in badKids represents an Insanity foreach (ReaderField parent in badChildren.Keys) { HashSet kids = badChildren[parent]; List badEntries = new List(kids.Count * 2); // put parent entr(ies) in first { foreach (int val in rfToValIdSets[parent]) { badEntries.AddRange(viToItemSets[val]); } } // now the entries for the descendants foreach (ReaderField kid in kids) { foreach (int val in rfToValIdSets[kid]) { badEntries.AddRange(viToItemSets[val]); } } insanity.Add(new Insanity(InsanityType.SUBREADER, "Found caches for decendents of " + parent.ToString(), badEntries.ToArray())); } return insanity; } /// Checks if the seed is an IndexReader, and if so will walk /// the hierarchy of subReaders building up a list of the objects /// returned by obj.getFieldCacheKey() /// private System.Collections.IList GetAllDecendentReaderKeys(System.Object seed) { List all = new List(17); // will grow as we iter all.Add(seed); for (int i = 0; i < all.Count; i++) { System.Object obj = all[i]; if (obj is IndexReader) { IndexReader[] subs = ((IndexReader) obj).GetSequentialSubReaders(); for (int j = 0; (null != subs) && (j < subs.Length); j++) { all.Add(subs[j].FieldCacheKey); } } } // need to skip the first, because it was the seed return all.GetRange(1, all.Count - 1); } /// Simple pair object for using "readerKey + fieldName" a Map key private sealed class ReaderField { public System.Object readerKey; public System.String fieldName; public ReaderField(System.Object readerKey, System.String fieldName) { this.readerKey = readerKey; this.fieldName = fieldName; } public override int GetHashCode() { return readerKey.GetHashCode() * fieldName.GetHashCode(); } public override bool Equals(System.Object that) { if (!(that is ReaderField)) return false; ReaderField other = (ReaderField) that; return (this.readerKey == other.readerKey && this.fieldName.Equals(other.fieldName)); } public override System.String ToString() { return readerKey.ToString() + "+" + fieldName; } } /// Simple container for a collection of related CacheEntry objects that /// in conjunction with eachother represent some "insane" usage of the /// FieldCache. /// public sealed class Insanity { private InsanityType type; private System.String msg; private CacheEntry[] entries; public Insanity(InsanityType type, System.String msg, params CacheEntry[] entries) { if (null == type) { throw new System.ArgumentException("Insanity requires non-null InsanityType"); } if (null == entries || 0 == entries.Length) { throw new System.ArgumentException("Insanity requires non-null/non-empty CacheEntry[]"); } this.type = type; this.msg = msg; this.entries = entries; } /// Type of insane behavior this object represents public InsanityType Type { get { return type; } } /// Description of hte insane behavior public string Msg { get { return msg; } } /// CacheEntry objects which suggest a problem public CacheEntry[] GetCacheEntries() { return entries; } /// Multi-Line representation of this Insanity object, starting with /// the Type and Msg, followed by each CacheEntry.toString() on it's /// own line prefaced by a tab character /// public override System.String ToString() { System.Text.StringBuilder buf = new System.Text.StringBuilder(); buf.Append(Type).Append(": "); System.String m = Msg; if (null != m) buf.Append(m); buf.Append('\n'); CacheEntry[] ce = GetCacheEntries(); for (int i = 0; i < ce.Length; i++) { buf.Append('\t').Append(ce[i].ToString()).Append('\n'); } return buf.ToString(); } } /// An Enumaration of the differnet types of "insane" behavior that /// may be detected in a FieldCache. /// /// /// /// /// /// /// /// public sealed class InsanityType { private System.String label; internal InsanityType(System.String label) { this.label = label; } public override System.String ToString() { return label; } /// Indicates an overlap in cache usage on a given field /// in sub/super readers. /// public static readonly InsanityType SUBREADER = new InsanityType("SUBREADER"); ///

/// Indicates entries have the same reader+fieldname but /// different cached values. This can happen if different datatypes, /// or parsers are used -- and while it's not necessarily a bug /// it's typically an indication of a possible problem. ///

///

/// PNOTE: Only the reader, fieldname, and cached value are actually /// tested -- if two cache entries have different parsers or datatypes but /// the cached values are the same Object (== not just equal()) this method /// does not consider that a red flag. This allows for subtle variations /// in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) ///

///

public static readonly InsanityType VALUEMISMATCH = new InsanityType("VALUEMISMATCH"); /// Indicates an expected bit of "insanity". This may be useful for /// clients that wish to preserve/log information about insane usage /// but indicate that it was expected. /// public static readonly InsanityType EXPECTED = new InsanityType("EXPECTED"); } } }