/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Analyzer = Lucene.Net.Analysis.Analyzer; using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer; using Document = Lucene.Net.Documents.Document; using FilterIndexReader = Lucene.Net.Index.FilterIndexReader; using IndexReader = Lucene.Net.Index.IndexReader; using QueryParser = Lucene.Net.QueryParsers.QueryParser; using FSDirectory = Lucene.Net.Store.FSDirectory; using Version = Lucene.Net.Util.Version; using Collector = Lucene.Net.Search.Collector; using IndexSearcher = Lucene.Net.Search.IndexSearcher; using Query = Lucene.Net.Search.Query; using ScoreDoc = Lucene.Net.Search.ScoreDoc; using Scorer = Lucene.Net.Search.Scorer; using Searcher = Lucene.Net.Search.Searcher; using TopScoreDocCollector = Lucene.Net.Search.TopScoreDocCollector; namespace Lucene.Net.Demo { ///

Simple command-line based search demo.

public class SearchFiles { private class AnonymousClassCollector:Collector { private Scorer scorer; private int docBase; // simply print docId and score of every matching document public override void Collect(int doc) { System.Console.Out.WriteLine("doc=" + doc + docBase + " score=" + scorer.Score()); } public override bool AcceptsDocsOutOfOrder() { return true; } public override void SetNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } public override void SetScorer(Scorer scorer) { this.scorer = scorer; } } ///

Use the norms from one field for all fields. Norms are read into memory, /// using a byte of memory per document per searched field. This can cause /// search of large collections with a large number of fields to run out of /// memory. If all of the fields contain only a single token, then the norms /// are all identical, then single norm vector may be shared. ///

private class OneNormsReader:FilterIndexReader { private System.String field; public OneNormsReader(IndexReader in_Renamed, System.String field):base(in_Renamed) { this.field = field; } public override byte[] Norms(System.String field) { return in_Renamed.Norms(this.field); } } private SearchFiles() { } ///

Simple command-line based search demo.

[STAThread] public static void Main(System.String[] args) { System.String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { System.Console.Out.WriteLine(usage); System.Environment.Exit(0); } System.String index = "index"; System.String field = "contents"; System.String queries = null; int repeat = 0; bool raw = false; System.String normsField = null; bool paging = true; int hitsPerPage = 10; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = System.Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } else if ("-paging".Equals(args[i])) { if (args[i + 1].Equals("false")) { paging = false; } else { hitsPerPage = System.Int32.Parse(args[i + 1]); if (hitsPerPage == 0) { paging = false; } } i++; } } IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.FileInfo(index)), true); // only searching, so read-only=true if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); System.IO.StreamReader in_Renamed = null; if (queries != null) { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding); } else { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null) // prompt the user System.Console.Out.WriteLine("Enter query: "); System.String line = in_Renamed.ReadLine(); if (line == null || line.Length == - 1) break; line = line.Trim(); if (line.Length == 0) break; Query query = parser.Parse(line); System.Console.Out.WriteLine("Searching for: " + query.ToString(field)); if (repeat > 0) { // repeat & time as benchmark System.DateTime start = System.DateTime.Now; for (int i = 0; i < repeat; i++) { searcher.Search(query, null, 100); } System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } if (paging) { DoPagingSearch(in_Renamed, searcher, query, hitsPerPage, raw, queries == null); } else { DoStreamingSearch(searcher, query); } } reader.Close(); } ///

This method uses a custom HitCollector implementation which simply prints out /// the docId and score of every matching document. /// /// This simulates the streaming search use case, where all hits are supposed to /// be processed, regardless of their relevance. ///

public static void DoStreamingSearch(Searcher searcher, Query query) { Collector streamingHitCollector = new AnonymousClassCollector(); searcher.Search(query, streamingHitCollector); } ///

This demonstrates a typical paging search scenario, where the search engine presents /// pages of size n to the user. The user can then go to the next page if interested in /// the next hits. /// /// When the query is executed for the first time, then only enough results are collected /// to fill 5 result pages. If the user wants to page beyond this limit, then the query /// is executed another time and all hits are collected. /// ///

public static void DoPagingSearch(System.IO.StreamReader in_Renamed, Searcher searcher, Query query, int hitsPerPage, bool raw, bool interactive) { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().scoreDocs; int numTotalHits = collector.GetTotalHits(); System.Console.Out.WriteLine(numTotalHits + " total matching documents"); int start = 0; int end = System.Math.Min(numTotalHits, hitsPerPage); while (true) { if (end > hits.Length) { System.Console.Out.WriteLine("Only results 1 - " + hits.Length + " of " + numTotalHits + " total matching documents collected."); System.Console.Out.WriteLine("Collect more (y/n) ?"); System.String line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.Search(query, collector); hits = collector.TopDocs().scoreDocs; } end = System.Math.Min(hits.Length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.Console.Out.WriteLine("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.Doc(hits[i].doc); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine((i + 1) + ". " + path); System.String title = doc.Get("title"); if (title != null) { System.Console.Out.WriteLine(" Title: " + doc.Get("title")); } } else { System.Console.Out.WriteLine((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { bool quit = false; while (true) { System.Console.Out.Write("Press "); if (start - hitsPerPage >= 0) { System.Console.Out.Write("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.Console.Out.Write("(n)ext page, "); } System.Console.Out.WriteLine("(q)uit or enter number to jump to a page."); System.String line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'q') { quit = true; break; } if (line[0] == 'p') { start = System.Math.Max(0, start - hitsPerPage); break; } else if (line[0] == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = System.Int32.Parse(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.Console.Out.WriteLine("No such page"); } } } if (quit) break; end = System.Math.Min(numTotalHits, start + hitsPerPage); } } } } }