/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Lucene.Net.Analysis; using Lucene.Net.Documents; using Lucene.Net.Analysis.Standard; using Lucene.Net.Index; using Lucene.Net.Search; using Lucene.Net.QueryParsers; using Lucene.Net.Store; using Lucene.Net.Util; using System.Threading; using System.Threading.Tasks; /* Suppose, we want a faceted search on fields f1 f2 f3, and their values in index are f1 f2 f3 -- -- -- doc1 A I 1 doc2 A I 2 doc3 A I 3 doc4 A J 1 doc5 A J 2 doc6 A J 3 doc7 B I 1 Algorithm: 1- Find all possible values for f1 which are (A,B) , for f2 which are (I,J) and for f3 which are (1,2,3) 2- Find Cartesian Product of (A,B)X(I,J)X(1,2,3). (12 possible groups) 3- Eliminate the ones that surely result in 0 hits. (for ex, B J 2. since they have no doc. in common) */ /* TODO: Support for pre-built queries defining groups can be added */ namespace Lucene.Net.Search { public partial class SimpleFacetedSearch : IDisposable { public const int DefaultMaxDocPerGroup = 25; public static int MAX_FACETS = 2048; IndexReader _Reader; List, OpenBitSetDISI>> _Groups = new List, OpenBitSetDISI>>(); public SimpleFacetedSearch(IndexReader reader, string groupByField) : this(reader, new string[] { groupByField }) { } public SimpleFacetedSearch(IndexReader reader, string[] groupByFields) { this._Reader = reader; List fieldValuesBitSets = new List(); //STEP 1 //f1 = A, B //f2 = I, J //f3 = 1, 2, 3 int maxFacets = 1; List> inputToCP = new List>(); foreach (string field in groupByFields) { FieldValuesBitSets f = new FieldValuesBitSets(reader, field); maxFacets *= f.FieldValueBitSetPair.Count; if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS); fieldValuesBitSets.Add(f); inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList()); } //STEP 2 // comb1: A I 1 // comb2: A I 2 etc. var cp = inputToCP.CartesianProduct(); //SETP 3 //create a single BitSet for each combination //BitSet1: A AND I AND 1 //BitSet2: A AND I AND 2 etc. //and remove impossible comb's (for ex, B J 3) from list. Parallel.ForEach(cp, combinations => { OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc()); bitSet.Set(0, bitSet.Size()); List comb = combinations.ToList(); for (int j = 0; j < comb.Count; j++) { bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]); } //STEP 3 if (bitSet.Cardinality() > 0) { lock(_Groups) _Groups.Add(new KeyValuePair, OpenBitSetDISI>(comb, bitSet)); } }); //Now _Groups has 7 rows (as , BitSet> pairs) } public Hits Search(Query query, int maxDocPerGroup = DefaultMaxDocPerGroup) { List hitsPerGroup = new List(); DocIdSet queryDocidSet = new CachingWrapperFilter(new QueryWrapperFilter(query)).GetDocIdSet(_Reader); Action[] actions = new Action[_Groups.Count]; for (int i = 0; i < _Groups.Count; i++) { HitsPerFacet h = new HitsPerFacet(new FacetName(_Groups[i].Key.ToArray()), _Reader, queryDocidSet, _Groups[i].Value, maxDocPerGroup); hitsPerGroup.Add(h); actions[i] = () => h.Calculate(); } Parallel.Invoke(actions); Hits hits = new Hits(); hits.HitsPerFacet = hitsPerGroup.ToArray(); return hits; } public void Dispose() { } } }