DefaultIteratorResultSet

package org.apache.maven.index;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.maven.index.context.IndexUtils;
import org.apache.maven.index.context.IndexingContext;
import org.apache.maven.index.creator.JarFileContentsIndexCreator;

/**
 * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
 *
 * @author cstamas
 */
public class DefaultIteratorResultSet
    implements IteratorResultSet
{
    private final IteratorSearchRequest searchRequest;

    private final IndexSearcher indexSearcher;

    private final List<IndexingContext> contexts;

    private final int[] starts;

    private final ArtifactInfoFilter filter;

    private final ArtifactInfoPostprocessor postprocessor;

    private final List<MatchHighlightRequest> matchHighlightRequests;

    private final TopDocs hits;

    private final int from;

    private final int count;

    private final int maxRecPointer;

    private int pointer;

    private int processedArtifactInfoCount;

    private ArtifactInfo ai;

    protected DefaultIteratorResultSet( final IteratorSearchRequest request, final IndexSearcher indexSearcher,
                                        final List<IndexingContext> contexts, final TopDocs hits )
        throws IOException
    {
        this.searchRequest = request;

        this.indexSearcher = indexSearcher;

        this.contexts = contexts;

        {
            int maxDoc = 0;
            this.starts = new int[contexts.size() + 1]; // build starts array
            for ( int i = 0; i < contexts.size(); i++ )
            {
                starts[i] = maxDoc;
                maxDoc += contexts.get( i ).getIndexReader().maxDoc(); // compute maxDocs
            }
            starts[contexts.size()] = maxDoc;
        }

        this.filter = request.getArtifactInfoFilter();

        this.postprocessor = request.getArtifactInfoPostprocessor();

        this.matchHighlightRequests = request.getMatchHighlightRequests();

        this.hits = hits;

        this.from = request.getStart();

        this.count =
            ( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min(
                request.getCount(), hits.scoreDocs.length ) );

        this.pointer = from;

        this.processedArtifactInfoCount = 0;

        this.maxRecPointer = from + count;

        ai = createNextAi();

        if ( ai == null )
        {
            cleanUp();
        }
    }

    public boolean hasNext()
    {
        return ai != null;
    }

    public ArtifactInfo next()
    {
        ArtifactInfo result = ai;

        try
        {
            ai = createNextAi();
        }
        catch ( IOException e )
        {
            ai = null;

            throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e );
        }
        finally
        {
            if ( ai == null )
            {
                cleanUp();
            }
        }

        return result;
    }

    public void remove()
    {
        throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
    }

    public Iterator<ArtifactInfo> iterator()
    {
        return this;
    }

    public void close()
    {
        cleanUp();
    }

    public int getTotalProcessedArtifactInfoCount()
    {
        return processedArtifactInfoCount;
    }

    @Override
    public void finalize()
        throws Throwable
    {
        super.finalize();

        if ( !cleanedUp )
        {
            System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query "
                + searchRequest.getQuery().toString() );

            cleanUp();
        }
    }

    // ==

    protected ArtifactInfo createNextAi()
        throws IOException
    {
        ArtifactInfo result = null;

        // we should stop if:
        // a) we found what we want
        // b) pointer advanced over more documents that user requested
        // c) pointer advanced over more documents that hits has
        // or we found what we need
        while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) )
        {
            Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc );

            IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc );

            result = IndexUtils.constructArtifactInfo( doc, context );

            if ( result != null )
            {
                // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
                // For debugging only!!!
                if ( searchRequest.isLuceneExplain() )
                {
                    result.getAttributes().put( Explanation.class.getName(),
                        indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() );
                }

                result.setLuceneScore( hits.scoreDocs[pointer].score );

                result.repository = context.getRepositoryId();

                result.context = context.getId();

                if ( filter != null )
                {
                    if ( !filter.accepts( context, result ) )
                    {
                        result = null;
                    }
                }

                if ( result != null && postprocessor != null )
                {
                    postprocessor.postprocess( context, result );
                }

                if ( result != null && matchHighlightRequests.size() > 0 )
                {
                    calculateHighlights( context, doc, result );
                }
            }

            pointer++;
            processedArtifactInfoCount++;
        }

        return result;
    }

    private volatile boolean cleanedUp = false;

    protected synchronized void cleanUp()
    {
        if ( cleanedUp )
        {
            return;
        }

        for ( IndexingContext ctx : contexts )
        {
            ctx.unlock();
        }

        this.cleanedUp = true;
    }

    /**
     * Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
     *
     * @param context
     * @param d
     * @param ai
     */
    protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai )
        throws IOException
    {
        IndexerField field = null;

        String text = null;

        List<String> highlightFragment = null;

        for ( MatchHighlightRequest hr : matchHighlightRequests )
        {
            field = selectStoredIndexerField( hr.getField() );

            if ( field != null )
            {
                text = ai.getFieldValue( field.getOntology() );

                if ( text != null )
                {
                    highlightFragment = highlightField( context, hr, field, text );

                    if ( highlightFragment != null && highlightFragment.size() > 0 )
                    {
                        MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment );

                        ai.getMatchHighlights().add( matchHighlight );
                    }
                }
            }
        }
    }

    /**
     * Select a STORED IndexerField assigned to passed in Field.
     *
     * @param field
     * @return
     */
    protected IndexerField selectStoredIndexerField( Field field )
    {
        // hack here
        if ( MAVEN.CLASSNAMES.equals( field ) )
        {
            return JarFileContentsIndexCreator.FLD_CLASSNAMES;
        }
        else
        {
            return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next();
        }
    }

    /**
     * Returns a string that contains match fragment highlighted in style as user requested.
     *
     * @param context
     * @param hr
     * @param field
     * @param doc
     * @return
     * @throws IOException
     */
    protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field,
                                           String text )
        throws IOException
    {
        // exception with classnames
        if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) )
        {
            text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" );
        }

        Query rewrittenQuery = hr.getQuery().rewrite( context.getIndexReader() );

        CachingTokenFilter tokenStream =
            new CachingTokenFilter( context.getAnalyzer().tokenStream( field.getKey(), new StringReader( text ) ) );

        Formatter formatter = null;

        if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) )
        {
            formatter = new SimpleHTMLFormatter();
        }
        else
        {
            throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString()
                + "\" is not supported!" );
        }

        return getBestFragments( rewrittenQuery, formatter, tokenStream, text, 3 );
    }

    protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
                                                   String text, int maxNumFragments )
        throws IOException
    {
        Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );

        highlighter.setTextFragmenter( new OneLineFragmenter() );

        tokenStream.reset();

        maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check

        TextFragment[] frag;
        // Get text
        ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments );

        try
        {
            frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );

            for ( int i = 0; i < frag.length; i++ )
            {
                if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) )
                {
                    fragTexts.add( frag[i].toString() );
                }
            }
        }
        catch ( InvalidTokenOffsetsException e )
        {
            // empty?
        }

        return fragTexts;
    }

    protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr )
    {
        return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) );
    }

    private static int readerIndex( int n, int[] starts, int numSubReaders )
    { // find reader for doc n:
        int lo = 0; // search starts array
        int hi = numSubReaders - 1; // for first element less

        while ( hi >= lo )
        {
            int mid = ( lo + hi ) >>> 1;
            int midValue = starts[mid];
            if ( n < midValue )
            {
                hi = mid - 1;
            }
            else if ( n > midValue )
            {
                lo = mid + 1;
            }
            else
            { // found a match
                while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue )
                {
                    mid++; // scan to last match
                }
                return mid;
            }
        }
        return hi;
    }
}