# Author: Erik Hatcher
#
# to query the index generated with manindex.py
# python mansearch.py
# by default, the index is stored in 'pages', which can be overriden with
# the MANDEX environment variable
# ====================================================================

import sys, os, lucene
from string import Template
from datetime import datetime
from getopt import getopt, GetoptError
from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.store import SimpleFSDirectory

if __name__ == '__main__':
    lucene.initVM(vmargs=['-Djava.awt.headless=true'])

    def usage():
        print sys.argv[0], "[--format=<template>] [--index=<dir>] [--stats] <query>"
        print "default index is found from MANDEX environment variable"

    try:
        options, args = getopt(sys.argv[1:], '', ['format=', 'index=', 'stats'])
    except GetoptError:
        usage()
        sys.exit(2)

    format = "#name"
    indexDir = os.environ.get('MANDEX') or 'pages'
    stats = False

    for o, a in options:
        if o == "--format":
            format = a
        elif o == "--index":
            indexDir = a
        elif o == "--stats":
            stats = True

    class CustomTemplate(Template):
        delimiter = '#'

    template = CustomTemplate(format)

    fsDir = SimpleFSDirectory(Paths.get(indexDir))
    searcher = IndexSearcher(DirectoryReader.open(fsDir))
    analyzer = StandardAnalyzer()

    parser = QueryParser("keywords", analyzer)
    parser.setDefaultOperator(QueryParser.Operator.AND)
    query = parser.parse(' '.join(args))

    start = datetime.now()
    scoreDocs = searcher.search(query, 50).scoreDocs
    duration = datetime.now() - start

    if stats:
        print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)

    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        table = dict((field.name(), field.stringValue())
                     for field in doc.getFields())
        print template.substitute(table)