/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.IO; using System.Linq; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Index; using Lucene.Net.Search; using Lucene.Net.Store; namespace WorldNet.Net { /// Test program to look up synonyms. public class SynLookup { static List already; private static BooleanQuery tmp; [STAThread] public static void Main(System.String[] args) { if (args.Length != 2) { System.Console.Out.WriteLine(typeof(SynLookup) + " "); return; } using (var directory = FSDirectory.Open(new DirectoryInfo(args[0]))) { using (var searcher = new IndexSearcher(directory, true)) { String word = args[1]; Query query = new TermQuery(new Term(Syns2Index.F_WORD, word)); var countingCollector = new CountingCollector(); searcher.Search(query, countingCollector); if (countingCollector.numHits == 0) { Console.Out.WriteLine("No synonyms found for " + word); } else { Console.Out.WriteLine("Synonyms found for \"" + word + "\":"); } var hits = searcher.Search(query, countingCollector.numHits).ScoreDocs; foreach (var v in hits.Select(t => searcher.Doc(t.Doc)).Select(doc => doc.GetValues(Syns2Index.F_SYN)).SelectMany(values => values)) { Console.Out.WriteLine(v); } } } } /// /// Perform synonym expansion on a query. /// /// query /// syns /// a /// field /// boost public static Query Expand(String query, Searcher syns, Analyzer a, String field, float boost) { already = new List(); // avoid dups var top = new List(); // needs to be separately listed.. var ts = a.TokenStream(field, new StringReader(query)); var termAtt = ts.AddAttribute(); while (ts.IncrementToken()) { var word = termAtt.Term; if (!already.Contains(word)) { already.Add(word); top.Add(word); } } tmp = new BooleanQuery(); // [2] form query System.Collections.IEnumerator it = top.GetEnumerator(); while (it.MoveNext()) { // [2a] add to level words in var word = (String)it.Current; var tq = new TermQuery(new Term(field, word)); tmp.Add(tq, Occur.SHOULD); var c = new CollectorImpl(field, boost); syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c); } return tmp; } internal sealed class CountingCollector : Collector { public int numHits; public override void SetScorer(Scorer scorer) { } public override void Collect(int doc) { numHits++; } public override void SetNextReader(IndexReader reader, int docBase) { } public override bool AcceptsDocsOutOfOrder { get { return true; } } } /// /// CollectorImpl /// internal sealed class CollectorImpl : Collector { private IndexReader reader; private readonly string field; private readonly float boost; public CollectorImpl(string field, float boost) { this.field = field; this.boost = boost; } public override void SetScorer(Scorer scorer) { // Ignore } public override void Collect(int doc) { var d = reader.Document(doc); var values = d.GetValues(Syns2Index.F_SYN); foreach (var syn in values.Where(syn => !already.Contains(syn))) { already.Add(syn); var tq = new TermQuery(new Term(field, syn)); if (boost > 0) // else keep normal 1.0 tq.Boost = boost; tmp.Add(tq, Occur.SHOULD); } } public override void SetNextReader(IndexReader reader, int docBase) { this.reader = reader; } public override bool AcceptsDocsOutOfOrder { get { return true; } } } /// /// From project WordNet.Net.Syns2Index /// public class Syns2Index { /// public const String F_SYN = "syn"; /// public const String F_WORD = "word"; } } }