1 | |
package org.apache.maven.jxr.util; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.util.Collections; |
23 | |
import java.util.Vector; |
24 | |
|
25 | |
|
26 | |
|
27 | |
|
28 | |
|
29 | |
|
30 | |
|
31 | |
public class SimpleWordTokenizer |
32 | |
{ |
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | 1 | public static final char[] BREAKERS = {'(', ')', '[', ' ', '{', '}'}; |
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
public static StringEntry[] tokenize( String line ) |
43 | |
{ |
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | 246 | int start = getStart( line ); |
50 | |
|
51 | |
|
52 | |
|
53 | 246 | if ( line == null || line.length() == 0 || start == -1 ) |
54 | |
{ |
55 | 48 | return new StringEntry[0]; |
56 | |
} |
57 | |
|
58 | 198 | return tokenize( line, start ); |
59 | |
} |
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
|
69 | |
public static StringEntry[] tokenize( String line, String find ) |
70 | |
{ |
71 | |
|
72 | 5 | Vector v = new Vector(); |
73 | |
|
74 | 5 | StringEntry[] se = tokenize( line ); |
75 | |
|
76 | 11 | for ( int i = 0; i < se.length; ++i ) |
77 | |
{ |
78 | |
|
79 | 6 | if ( se[i].toString().equals( find ) ) |
80 | |
{ |
81 | 5 | v.addElement( se[i] ); |
82 | |
} |
83 | |
|
84 | |
} |
85 | |
|
86 | 5 | StringEntry[] found = new StringEntry[v.size()]; |
87 | 5 | Collections.sort( v ); |
88 | 5 | v.copyInto( found ); |
89 | 5 | return found; |
90 | |
} |
91 | |
|
92 | |
|
93 | |
|
94 | |
|
95 | |
private static StringEntry[] tokenize( String line, int start ) |
96 | |
{ |
97 | |
|
98 | 198 | Vector words = new Vector(); |
99 | |
|
100 | |
|
101 | |
|
102 | |
|
103 | |
while ( true ) |
104 | |
{ |
105 | |
|
106 | 680 | int next = getNextBreak( line, start ); |
107 | |
|
108 | 680 | if ( next < 0 || next <= start ) |
109 | |
{ |
110 | 198 | break; |
111 | |
} |
112 | |
|
113 | 482 | String word = line.substring( start, next ); |
114 | |
|
115 | 482 | if ( isWord( word ) ) |
116 | |
{ |
117 | 122 | words.addElement( new StringEntry( word, start ) ); |
118 | |
} |
119 | |
|
120 | 482 | start = next + 1; |
121 | |
} |
122 | |
|
123 | 198 | StringEntry[] found = new StringEntry[words.size()]; |
124 | 198 | words.copyInto( found ); |
125 | 198 | return found; |
126 | |
} |
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | |
private static boolean isWord( String string ) |
134 | |
{ |
135 | |
|
136 | 482 | if ( string == null || string.length() == 0 ) |
137 | |
{ |
138 | |
|
139 | |
return false; |
140 | |
} |
141 | |
|
142 | 3056 | for ( int i = 0; i < string.length(); ++i ) |
143 | |
{ |
144 | |
|
145 | 2934 | char c = string.charAt( i ); |
146 | |
|
147 | 2934 | if ( Character.isLetter( c ) == false && c != '.' ) |
148 | |
{ |
149 | 360 | return false; |
150 | |
} |
151 | |
|
152 | |
} |
153 | |
|
154 | 122 | return true; |
155 | |
} |
156 | |
|
157 | |
|
158 | |
|
159 | |
|
160 | |
private static int getNextBreak( String string, int start ) |
161 | |
{ |
162 | |
|
163 | 680 | int breakPoint = -1; |
164 | |
|
165 | 4760 | for ( int i = 0; i < BREAKERS.length; ++i ) |
166 | |
{ |
167 | |
|
168 | 4080 | int next = string.indexOf( BREAKERS[i], start ); |
169 | |
|
170 | 4080 | if ( breakPoint == -1 || next < breakPoint && next != -1 ) |
171 | |
{ |
172 | |
|
173 | 2594 | breakPoint = next; |
174 | |
|
175 | |
} |
176 | |
|
177 | |
} |
178 | |
|
179 | |
|
180 | 680 | if ( breakPoint == -1 ) |
181 | |
{ |
182 | 214 | breakPoint = string.length(); |
183 | |
} |
184 | |
|
185 | 680 | return breakPoint; |
186 | |
} |
187 | |
|
188 | |
|
189 | |
|
190 | |
|
191 | |
private static int getStart( String string ) |
192 | |
{ |
193 | |
|
194 | 1205 | for ( int i = 0; i < string.length(); ++i ) |
195 | |
{ |
196 | |
|
197 | 1157 | if ( isBreaker( string.charAt( i ) ) == false ) |
198 | |
{ |
199 | 198 | return i; |
200 | |
} |
201 | |
|
202 | |
} |
203 | |
|
204 | 48 | return -1; |
205 | |
} |
206 | |
|
207 | |
|
208 | |
|
209 | |
|
210 | |
|
211 | |
private static boolean isBreaker( char c ) |
212 | |
{ |
213 | |
|
214 | 5194 | for ( int i = 0; i < BREAKERS.length; ++i ) |
215 | |
{ |
216 | |
|
217 | 4996 | if ( BREAKERS[i] == c ) |
218 | |
{ |
219 | 959 | return true; |
220 | |
} |
221 | |
|
222 | |
} |
223 | |
|
224 | 198 | return false; |
225 | |
} |
226 | |
|
227 | |
} |
228 | |
|