# ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Author: Erik Hatcher # # to query the index generated with manindex.py # python mansearch.py # by default, the index is stored in 'pages', which can be overriden with # the MANDEX environment variable # ==================================================================== import sys, os, lucene from string import Template from datetime import datetime from getopt import getopt, GetoptError from java.nio.file import Paths from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import DirectoryReader from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.search import IndexSearcher from org.apache.lucene.store import SimpleFSDirectory if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) def usage(): print sys.argv[0], "[--format=] [--index=] [--stats] " print "default index is found from MANDEX environment variable" try: options, args = getopt(sys.argv[1:], '', ['format=', 'index=', 'stats']) except GetoptError: usage() sys.exit(2) format = "#name" indexDir = os.environ.get('MANDEX') or 'pages' stats = False for o, a in options: if o == "--format": format = a elif o == "--index": indexDir = a elif o == "--stats": stats = True class CustomTemplate(Template): delimiter = '#' template = CustomTemplate(format) fsDir = SimpleFSDirectory(Paths.get(indexDir)) searcher = IndexSearcher(DirectoryReader.open(fsDir)) analyzer = StandardAnalyzer() parser = QueryParser("keywords", analyzer) parser.setDefaultOperator(QueryParser.Operator.AND) query = parser.parse(' '.join(args)) start = datetime.now() scoreDocs = searcher.search(query, 50).scoreDocs duration = datetime.now() - start if stats: print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) table = dict((field.name(), field.stringValue()) for field in doc.getFields()) print template.substitute(table)