Coverage Report - org.apache.maven.jxr.util.SimpleWordTokenizer
 
Classes in this File Line Coverage Branch Coverage Complexity
SimpleWordTokenizer
100%
47/47
N/A
4
 
 1  
 package org.apache.maven.jxr.util;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.util.Collections;
 23  
 import java.util.Vector;
 24  
 
 25  
 /**
 26  
  * This is a small and fast word tokenizer. It has different characteristics
 27  
  * from the normal Java tokenizer. It only considers clear words that are only
 28  
  * ended with spaces as strings. EX: "Flight" would be a word but "Flight()"
 29  
  * would not.
 30  
  */
 31  
 public class SimpleWordTokenizer
 32  
 {
 33  
 
 34  
     /**
 35  
      * Description of the Field
 36  
      */
 37  1
     public static final char[] BREAKERS = {'(', ')', '[', ' ', '{', '}'};
 38  
 
 39  
     /**
 40  
      * Break the given line into multiple StringUtils
 41  
      */
 42  
     public static StringEntry[] tokenize( String line )
 43  
     {
 44  
 
 45  
         /*
 46  
         determine where to start processing this String... this could
 47  
         either be the start of the line or just keep going until the first
 48  
         */
 49  246
         int start = getStart( line );
 50  
 
 51  
         //find the first non-BREAKER char and assume that is where you want to start
 52  
 
 53  246
         if ( line == null || line.length() == 0 || start == -1 )
 54  
         {
 55  48
             return new StringEntry[0];
 56  
         }
 57  
 
 58  198
         return tokenize( line, start );
 59  
     }
 60  
 
 61  
 
 62  
     /**
 63  
      * Tokenize the given line but only return StringUtils that match the parameter
 64  
      * find.
 65  
      *
 66  
      * @param line String to search in
 67  
      * @param find String to match.
 68  
      */
 69  
     public static StringEntry[] tokenize( String line, String find )
 70  
     {
 71  
 
 72  5
         Vector v = new Vector();
 73  
 
 74  5
         StringEntry[] se = tokenize( line );
 75  
 
 76  11
         for ( int i = 0; i < se.length; ++i )
 77  
         {
 78  
 
 79  6
             if ( se[i].toString().equals( find ) )
 80  
             {
 81  5
                 v.addElement( se[i] );
 82  
             }
 83  
 
 84  
         }
 85  
 
 86  5
         StringEntry[] found = new StringEntry[v.size()];
 87  5
         Collections.sort( v );
 88  5
         v.copyInto( found );
 89  5
         return found;
 90  
     }
 91  
 
 92  
     /**
 93  
      * Internal impl. Specify the start and end.
 94  
      */
 95  
     private static StringEntry[] tokenize( String line, int start )
 96  
     {
 97  
 
 98  198
         Vector words = new Vector();
 99  
 
 100  
         //algorithm works like this... break the line out into segments
 101  
         //that are separated by spaces, and if the entire String doesn't contain
 102  
         //a non-Alpha char then assume it is a word.
 103  
         while ( true )
 104  
         {
 105  
 
 106  680
             int next = getNextBreak( line, start );
 107  
 
 108  680
             if ( next < 0 || next <= start )
 109  
             {
 110  198
                 break;
 111  
             }
 112  
 
 113  482
             String word = line.substring( start, next );
 114  
 
 115  482
             if ( isWord( word ) )
 116  
             {
 117  122
                 words.addElement( new StringEntry( word, start ) );
 118  
             }
 119  
 
 120  482
             start = next + 1;
 121  
         }
 122  
 
 123  198
         StringEntry[] found = new StringEntry[words.size()];
 124  198
         words.copyInto( found );
 125  198
         return found;
 126  
     }
 127  
 
 128  
 
 129  
     /**
 130  
      * Go through the entire String and if any character is not a Letter( a, b,
 131  
      * c, d, etc) then return false.
 132  
      */
 133  
     private static boolean isWord( String string )
 134  
     {
 135  
 
 136  482
         if ( string == null || string.length() == 0 )
 137  
         {
 138  
 
 139  
             return false;
 140  
         }
 141  
 
 142  3056
         for ( int i = 0; i < string.length(); ++i )
 143  
         {
 144  
 
 145  2934
             char c = string.charAt( i );
 146  
 
 147  2934
             if ( Character.isLetter( c ) == false && c != '.' )
 148  
             {
 149  360
                 return false;
 150  
             }
 151  
 
 152  
         }
 153  
 
 154  122
         return true;
 155  
     }
 156  
 
 157  
     /**
 158  
      * Go through the list of BREAKERS and find the closes one.
 159  
      */
 160  
     private static int getNextBreak( String string, int start )
 161  
     {
 162  
 
 163  680
         int breakPoint = -1;
 164  
 
 165  4760
         for ( int i = 0; i < BREAKERS.length; ++i )
 166  
         {
 167  
 
 168  4080
             int next = string.indexOf( BREAKERS[i], start );
 169  
 
 170  4080
             if ( breakPoint == -1 || next < breakPoint && next != -1 )
 171  
             {
 172  
 
 173  2594
                 breakPoint = next;
 174  
 
 175  
             }
 176  
 
 177  
         }
 178  
 
 179  
         //if the breakPoint is still -1 go to the end of the string
 180  680
         if ( breakPoint == -1 )
 181  
         {
 182  214
             breakPoint = string.length();
 183  
         }
 184  
 
 185  680
         return breakPoint;
 186  
     }
 187  
 
 188  
     /**
 189  
      * Go through the list of BREAKERS and find the closes one.
 190  
      */
 191  
     private static int getStart( String string )
 192  
     {
 193  
 
 194  1205
         for ( int i = 0; i < string.length(); ++i )
 195  
         {
 196  
 
 197  1157
             if ( isBreaker( string.charAt( i ) ) == false )
 198  
             {
 199  198
                 return i;
 200  
             }
 201  
 
 202  
         }
 203  
 
 204  48
         return -1;
 205  
     }
 206  
 
 207  
 
 208  
     /**
 209  
      * Return true if the given char is considered a breaker.
 210  
      */
 211  
     private static boolean isBreaker( char c )
 212  
     {
 213  
 
 214  5194
         for ( int i = 0; i < BREAKERS.length; ++i )
 215  
         {
 216  
 
 217  4996
             if ( BREAKERS[i] == c )
 218  
             {
 219  959
                 return true;
 220  
             }
 221  
 
 222  
         }
 223  
 
 224  198
         return false;
 225  
     }
 226  
 
 227  
 }
 228