/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
namespace WorldNet.Net
{
/// Test program to look up synonyms.
public class SynLookup
{
static List already;
private static BooleanQuery tmp;
[STAThread]
public static void Main(System.String[] args)
{
if (args.Length != 2)
{
System.Console.Out.WriteLine(typeof(SynLookup) + " ");
return;
}
using (var directory = FSDirectory.Open(new DirectoryInfo(args[0])))
{
using (var searcher = new IndexSearcher(directory, true))
{
String word = args[1];
Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
var countingCollector = new CountingCollector();
searcher.Search(query, countingCollector);
if (countingCollector.numHits == 0)
{
Console.Out.WriteLine("No synonyms found for " + word);
}
else
{
Console.Out.WriteLine("Synonyms found for \"" + word + "\":");
}
var hits = searcher.Search(query, countingCollector.numHits).ScoreDocs;
foreach (var v in
hits.Select(t => searcher.Doc(t.Doc)).Select(doc => doc.GetValues(Syns2Index.F_SYN)).SelectMany(values => values))
{
Console.Out.WriteLine(v);
}
}
}
}
///
/// Perform synonym expansion on a query.
///
/// query
/// syns
/// a
/// field
/// boost
public static Query Expand(String query,
Searcher syns,
Analyzer a,
String field,
float boost)
{
already = new List(); // avoid dups
var top = new List(); // needs to be separately listed..
var ts = a.TokenStream(field, new StringReader(query));
var termAtt = ts.AddAttribute();
while (ts.IncrementToken())
{
var word = termAtt.Term;
if (!already.Contains(word))
{
already.Add(word);
top.Add(word);
}
}
tmp = new BooleanQuery();
// [2] form query
System.Collections.IEnumerator it = top.GetEnumerator();
while (it.MoveNext())
{
// [2a] add to level words in
var word = (String)it.Current;
var tq = new TermQuery(new Term(field, word));
tmp.Add(tq, Occur.SHOULD);
var c = new CollectorImpl(field, boost);
syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c);
}
return tmp;
}
internal sealed class CountingCollector : Collector
{
public int numHits;
public override void SetScorer(Scorer scorer)
{ }
public override void Collect(int doc)
{
numHits++;
}
public override void SetNextReader(IndexReader reader, int docBase)
{ }
public override bool AcceptsDocsOutOfOrder
{
get { return true; }
}
}
///
/// CollectorImpl
///
internal sealed class CollectorImpl : Collector
{
private IndexReader reader;
private readonly string field;
private readonly float boost;
public CollectorImpl(string field, float boost)
{
this.field = field;
this.boost = boost;
}
public override void SetScorer(Scorer scorer)
{
// Ignore
}
public override void Collect(int doc)
{
var d = reader.Document(doc);
var values = d.GetValues(Syns2Index.F_SYN);
foreach (var syn in values.Where(syn => !already.Contains(syn)))
{
already.Add(syn);
var tq = new TermQuery(new Term(field, syn));
if (boost > 0) // else keep normal 1.0
tq.Boost = boost;
tmp.Add(tq, Occur.SHOULD);
}
}
public override void SetNextReader(IndexReader reader, int docBase)
{
this.reader = reader;
}
public override bool AcceptsDocsOutOfOrder
{
get { return true; }
}
}
///
/// From project WordNet.Net.Syns2Index
///
public class Syns2Index
{
///
public const String F_SYN = "syn";
///
public const String F_WORD = "word";
}
}
}