/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.IO;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Index;
using Lucene.Net.Search;
using FSDirectory = Lucene.Net.Store.FSDirectory;
using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Demo
{
/// Simple command-line based search demo.
public static class SearchFiles
{
private class AnonymousClassCollector : Collector
{
private Scorer scorer;
private int docBase;
// simply print docId and score of every matching document
public override void Collect(int doc)
{
Console.Out.WriteLine("doc=" + doc + docBase + " score=" + scorer.Score());
}
public override bool AcceptsDocsOutOfOrder
{
get { return true; }
}
public override void SetNextReader(IndexReader reader, int docBase)
{
this.docBase = docBase;
}
public override void SetScorer(Scorer scorer)
{
this.scorer = scorer;
}
}
///
/// Use the norms from one field for all fields. Norms are read into memory,
/// using a byte of memory per document per searched field. This can cause
/// search of large collections with a large number of fields to run out of
/// memory. If all of the fields contain only a single token, then the norms
/// are all identical, then single norm vector may be shared.
///
private class OneNormsReader : FilterIndexReader
{
private readonly String field;
public OneNormsReader(IndexReader in_Renamed, String field):base(in_Renamed)
{
this.field = field;
}
public override byte[] Norms(String field)
{
return in_Renamed.Norms(this.field);
}
}
/// Simple command-line based search demo.
[STAThread]
public static void Main(String[] args)
{
String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0])))
{
Console.Out.WriteLine(usage);
Environment.Exit(0);
}
String index = "index";
String field = "contents";
String queries = null;
int repeat = 0;
bool raw = false;
String normsField = null;
bool paging = true;
int hitsPerPage = 10;
for (int i = 0; i < args.Length; i++)
{
if ("-index".Equals(args[i]))
{
index = args[i + 1];
i++;
}
else if ("-field".Equals(args[i]))
{
field = args[i + 1];
i++;
}
else if ("-queries".Equals(args[i]))
{
queries = args[i + 1];
i++;
}
else if ("-repeat".Equals(args[i]))
{
repeat = Int32.Parse(args[i + 1]);
i++;
}
else if ("-raw".Equals(args[i]))
{
raw = true;
}
else if ("-norms".Equals(args[i]))
{
normsField = args[i + 1];
i++;
}
else if ("-paging".Equals(args[i]))
{
if (args[i + 1].Equals("false"))
{
paging = false;
}
else
{
hitsPerPage = Int32.Parse(args[i + 1]);
if (hitsPerPage == 0)
{
paging = false;
}
}
i++;
}
}
IndexReader indexReader = null;
try
{
// only searching, so read-only=true
indexReader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(index)), true); // only searching, so read-only=true
if (normsField != null)
indexReader = new OneNormsReader(indexReader, normsField);
Searcher searcher = new IndexSearcher(indexReader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
StreamReader queryReader;
if (queries != null)
{
queryReader = new StreamReader(new StreamReader(queries, Encoding.Default).BaseStream, new StreamReader(queries, Encoding.Default).CurrentEncoding);
}
else
{
queryReader = new StreamReader(new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).BaseStream, new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).CurrentEncoding);
}
var parser = new QueryParser(Version.LUCENE_30, field, analyzer);
while (true)
{
if (queries == null)
// prompt the user
Console.Out.WriteLine("Enter query: ");
String line = queryReader.ReadLine();
if (line == null || line.Length == - 1)
break;
line = line.Trim();
if (line.Length == 0)
break;
Query query = parser.Parse(line);
Console.Out.WriteLine("Searching for: " + query.ToString(field));
if (repeat > 0)
{
// repeat & time as benchmark
DateTime start = DateTime.Now;
for (int i = 0; i < repeat; i++)
{
searcher.Search(query, null, 100);
}
DateTime end = DateTime.Now;
Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms");
}
if (paging)
{
DoPagingSearch(queryReader, searcher, query, hitsPerPage, raw, queries == null);
}
else
{
DoStreamingSearch(searcher, query);
}
}
queryReader.Close();
}
finally
{
if (indexReader != null)
{
indexReader.Dispose();
}
}
}
///
/// This method uses a custom HitCollector implementation which simply prints out
/// the docId and score of every matching document.
///
/// This simulates the streaming search use case, where all hits are supposed to
/// be processed, regardless of their relevance.
///
public static void DoStreamingSearch(Searcher searcher, Query query)
{
Collector streamingHitCollector = new AnonymousClassCollector();
searcher.Search(query, streamingHitCollector);
}
/// This demonstrates a typical paging search scenario, where the search engine presents
/// pages of size n to the user. The user can then go to the next page if interested in
/// the next hits.
///
/// When the query is executed for the first time, then only enough results are collected
/// to fill 5 result pages. If the user wants to page beyond this limit, then the query
/// is executed another time and all hits are collected.
///
///
public static void DoPagingSearch(StreamReader input, Searcher searcher, Query query, int hitsPerPage, bool raw, bool interactive)
{
// Collect enough docs to show 5 pages
var collector = TopScoreDocCollector.Create(5 * hitsPerPage, false);
searcher.Search(query, collector);
var hits = collector.TopDocs().ScoreDocs;
int numTotalHits = collector.TotalHits;
Console.Out.WriteLine(numTotalHits + " total matching documents");
int start = 0;
int end = Math.Min(numTotalHits, hitsPerPage);
while (true)
{
if (end > hits.Length)
{
Console.Out.WriteLine("Only results 1 - " + hits.Length + " of " + numTotalHits + " total matching documents collected.");
Console.Out.WriteLine("Collect more (y/n) ?");
String line = input.ReadLine();
if (String.IsNullOrEmpty(line) || line[0] == 'n')
{
break;
}
collector = TopScoreDocCollector.Create(numTotalHits, false);
searcher.Search(query, collector);
hits = collector.TopDocs().ScoreDocs;
}
end = Math.Min(hits.Length, start + hitsPerPage);
for (int i = start; i < end; i++)
{
if (raw)
{
// output raw format
Console.Out.WriteLine("doc=" + hits[i].Doc + " score=" + hits[i].Score);
continue;
}
Document doc = searcher.Doc(hits[i].Doc);
String path = doc.Get("path");
if (path != null)
{
Console.Out.WriteLine((i + 1) + ". " + path);
String title = doc.Get("title");
if (title != null)
{
Console.Out.WriteLine(" Title: " + doc.Get("title"));
}
}
else
{
Console.Out.WriteLine((i + 1) + ". " + "No path for this document");
}
}
if (!interactive)
{
break;
}
if (numTotalHits >= end)
{
bool quit = false;
while (true)
{
Console.Out.Write("Press ");
if (start - hitsPerPage >= 0)
{
Console.Out.Write("(p)revious page, ");
}
if (start + hitsPerPage < numTotalHits)
{
Console.Out.Write("(n)ext page, ");
}
Console.Out.WriteLine("(q)uit or enter number to jump to a page.");
String line = input.ReadLine();
if (String.IsNullOrEmpty(line) || line[0] == 'q')
{
quit = true;
break;
}
if (line[0] == 'p')
{
start = Math.Max(0, start - hitsPerPage);
break;
}
else if (line[0] == 'n')
{
if (start + hitsPerPage < numTotalHits)
{
start += hitsPerPage;
}
break;
}
else
{
int page = Int32.Parse(line);
if ((page - 1) * hitsPerPage < numTotalHits)
{
start = (page - 1) * hitsPerPage;
break;
}
else
{
Console.Out.WriteLine("No such page");
}
}
}
if (quit)
break;
end = Math.Min(numTotalHits, start + hitsPerPage);
}
}
}
}
}