Coverage Report - org.apache.maven.index.context.NexusAnalyzer
 
Classes in this File Line Coverage Branch Coverage Complexity
NexusAnalyzer
100 %
6/6
100 %
2/2
1,2
NexusAnalyzer$DeprecatedClassnamesTokenizer
100 %
4/4
100 %
2/2
1,2
NexusAnalyzer$LetterOrDigitTokenizer
100 %
4/4
N/A
1,2
NexusAnalyzer$NoopTokenizer
0 %
0/3
N/A
1,2
 
 1  
 package org.apache.maven.index.context;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0    
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.Reader;
 23  
 
 24  
 import org.apache.lucene.analysis.Analyzer;
 25  
 import org.apache.lucene.analysis.CharTokenizer;
 26  
 import org.apache.lucene.analysis.TokenStream;
 27  
 import org.apache.lucene.analysis.Tokenizer;
 28  
 import org.apache.maven.index.creator.JarFileContentsIndexCreator;
 29  
 
 30  
 /**
 31  
  * A Nexus specific analyzer. Only difference from Lucene's SimpleAnalyzer is that we use LetterOrDigitTokenizer instead
 32  
  * of LowerCaseTokenizer. LetterOrDigitTokenizer does pretty much the same as LowerCaseTokenizer, it normalizes to lower
 33  
  * case letter, but it takes letters and numbers too (as opposed to LowerCaseTokenizer) as token chars.
 34  
  * 
 35  
  * @author Eugene Kuleshov
 36  
  * @author cstamas
 37  
  */
 38  3387
 public final class NexusAnalyzer
 39  
     extends Analyzer
 40  
 {
 41  
     public TokenStream tokenStream( String fieldName, Reader reader )
 42  
     {
 43  63344
         return getTokenizer( fieldName, reader );
 44  
     }
 45  
 
 46  
     protected Tokenizer getTokenizer( String fieldName, Reader reader )
 47  
     {
 48  63344
         if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( fieldName ) )
 49  
         {
 50  
             // To keep "backward" compatibility, we have to use old flawed tokenizer.
 51  1091
             return new DeprecatedClassnamesTokenizer( reader );
 52  
         }
 53  
         else
 54  
         {
 55  62253
             return new LetterOrDigitTokenizer( reader );
 56  
         }
 57  
     }
 58  
 
 59  
     // ==
 60  
 
 61  
     public static class NoopTokenizer
 62  
         extends CharTokenizer
 63  
     {
 64  
         public NoopTokenizer( Reader in )
 65  
         {
 66  0
             super( in );
 67  0
         }
 68  
 
 69  
         @Override
 70  
         protected boolean isTokenChar( char c )
 71  
         {
 72  0
             return true;
 73  
         }
 74  
     }
 75  
 
 76  
     @Deprecated
 77  
     public static class DeprecatedClassnamesTokenizer
 78  
         extends CharTokenizer
 79  
     {
 80  
         public DeprecatedClassnamesTokenizer( Reader in )
 81  
         {
 82  1091
             super( in );
 83  1091
         }
 84  
 
 85  
         @Override
 86  
         protected boolean isTokenChar( char c )
 87  
         {
 88  2653689
             return c != '\n';
 89  
         }
 90  
 
 91  
         @Override
 92  
         protected char normalize( char c )
 93  
         {
 94  2581259
             return Character.toLowerCase( c );
 95  
         }
 96  
     }
 97  
 
 98  3387
     public static class LetterOrDigitTokenizer
 99  
         extends CharTokenizer
 100  
     {
 101  
         public LetterOrDigitTokenizer( Reader in )
 102  
         {
 103  62253
             super( in );
 104  62253
         }
 105  
 
 106  
         @Override
 107  
         protected boolean isTokenChar( char c )
 108  
         {
 109  3597645
             return Character.isLetterOrDigit( c );
 110  
         }
 111  
 
 112  
         @Override
 113  
         protected char normalize( char c )
 114  
         {
 115  3099787
             return Character.toLowerCase( c );
 116  
         }
 117  
     }
 118  
 
 119  
 }