View Javadoc
1   package org.apache.maven.index.context;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.lucene.analysis.Analyzer;
23  import org.apache.lucene.analysis.AnalyzerWrapper;
24  import org.apache.lucene.analysis.util.CharTokenizer;
25  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
26  
27  /**
28   * A Nexus specific analyzer. Only difference from Lucene's SimpleAnalyzer is that we use LetterOrDigitTokenizer instead
29   * of LowerCaseTokenizer. LetterOrDigitTokenizer does pretty much the same as LowerCaseTokenizer, it normalizes to lower
30   * case letter, but it takes letters and numbers too (as opposed to LowerCaseTokenizer) as token chars.
31   * 
32   * @author Eugene Kuleshov
33   * @author cstamas
34   */
35  public final class NexusAnalyzer
36      extends AnalyzerWrapper
37  {
38      private static final Analyzer CLASS_NAMES_ANALYZER = new Analyzer()
39      {
40          @Override
41          protected TokenStreamComponents createComponents( String fieldName )
42          {
43              return new TokenStreamComponents( new DeprecatedClassnamesTokenizer() );
44          }
45      };
46  
47      private static final Analyzer LETTER_OR_DIGIT_ANALYZER = new Analyzer()
48      {
49          @Override
50          protected TokenStreamComponents createComponents( String filedName )
51          {
52              return new TokenStreamComponents( new LetterOrDigitTokenizer() );
53          }
54      };
55  
56      public NexusAnalyzer()
57      {
58          super( PER_FIELD_REUSE_STRATEGY );
59      }
60  
61      @Override
62      protected Analyzer getWrappedAnalyzer( String fieldName )
63      {
64          if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( fieldName ) )
65          {
66              // To keep "backward" compatibility, we have to use old flawed tokenizer.
67              return CLASS_NAMES_ANALYZER;
68          }
69          else
70          {
71              return LETTER_OR_DIGIT_ANALYZER;
72          }
73      }
74  
75      // ==
76  
77      public static class NoopTokenizer
78          extends CharTokenizer
79      {
80          public NoopTokenizer()
81          {
82              super();
83          }
84  
85          @Override
86          protected boolean isTokenChar( int i )
87          {
88              return true;
89          }
90      }
91  
92      @Deprecated
93      public static class DeprecatedClassnamesTokenizer
94          extends CharTokenizer
95      {
96          public DeprecatedClassnamesTokenizer()
97          {
98              super();
99          }
100         
101         @Override
102         protected boolean isTokenChar( int i )
103         {
104             return i != '\n';
105         }
106         
107         @Override
108         protected int normalize( int c )
109         {
110             return Character.toLowerCase( c );
111         }
112     }
113 
114     public static class LetterOrDigitTokenizer
115         extends CharTokenizer
116     {
117         public LetterOrDigitTokenizer()
118         {
119             super();
120         }
121 
122         @Override
123         protected boolean isTokenChar( int c )
124         {
125             return Character.isLetterOrDigit( c );
126         }
127 
128         @Override
129         protected int normalize( int c )
130         {
131             return Character.toLowerCase( c );
132         }
133     }
134 
135 }