1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
package org.apache.commons.feedparser.sax; |
18 | |
|
19 | |
import java.util.HashMap; |
20 | |
import java.util.HashSet; |
21 | |
|
22 | |
import org.apache.commons.feedparser.FeedParserException; |
23 | |
import org.apache.commons.feedparser.FeedParserListener; |
24 | |
import org.apache.commons.feedparser.FeedParserState; |
25 | |
import org.apache.commons.feedparser.FeedVersion; |
26 | |
import org.xml.sax.Attributes; |
27 | |
import org.xml.sax.SAXException; |
28 | |
import org.xml.sax.helpers.DefaultHandler; |
29 | |
|
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
public class RSSFeedParser extends BaseDefaultHandler { |
35 | |
|
36 | 0 | public FeedParserListener listener = null; |
37 | |
|
38 | 0 | boolean onItem = false; |
39 | |
|
40 | 0 | HashMap properties = new HashMap(); |
41 | |
|
42 | 0 | FeedParserState state = new FeedParserState(); |
43 | |
|
44 | 0 | static HashSet RSS_NAMESPACES = new HashSet(); |
45 | |
|
46 | 0 | static HashSet RDF_NAMESPACES = new HashSet(); |
47 | |
|
48 | 0 | static HashSet MOD_CONTENT_NAMESPACES = new HashSet(); |
49 | |
|
50 | |
static { |
51 | |
|
52 | 0 | RSS_NAMESPACES.add( "http://purl.org/rss/1.0/" ); |
53 | |
|
54 | 0 | RDF_NAMESPACES.add( "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ); |
55 | |
|
56 | 0 | MOD_CONTENT_NAMESPACES.add( "http://purl.org/rss/1.0/modules/content/" ); |
57 | |
|
58 | 0 | } |
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
public RSSFeedParser() { |
67 | |
|
68 | 0 | super( "FIXME" ); |
69 | |
|
70 | 0 | this.parser = this; |
71 | |
|
72 | 0 | setNext( new ChannelTemplate( this ) ); |
73 | |
|
74 | 0 | } |
75 | |
|
76 | |
public void startDocument() throws SAXException { |
77 | |
|
78 | |
try { |
79 | |
|
80 | 0 | FeedVersion v = new FeedVersion(); |
81 | 0 | v.isRSS = true; |
82 | 0 | listener.onFeedVersion( v ); |
83 | |
|
84 | 0 | listener.init(); |
85 | |
|
86 | 0 | } catch ( FeedParserException f ) { |
87 | 0 | throw new SAXException( f ); |
88 | 0 | } |
89 | |
|
90 | 0 | } |
91 | |
|
92 | |
public void endDocument() throws SAXException { |
93 | |
|
94 | |
try { |
95 | |
|
96 | 0 | listener.finished(); |
97 | |
|
98 | 0 | } catch ( FeedParserException f ) { |
99 | 0 | throw new SAXException( f ); |
100 | 0 | } |
101 | |
|
102 | 0 | } |
103 | |
|
104 | |
|
105 | |
|
106 | |
|
107 | |
class ChannelTemplate extends BaseDefaultHandler { |
108 | |
|
109 | 0 | public ChannelTemplate( RSSFeedParser parser ) { |
110 | |
|
111 | 0 | super( "channel", parser.RSS_NAMESPACES, parser ); |
112 | |
|
113 | 0 | setNext( new URLTemplate( parser ) ); |
114 | |
|
115 | 0 | } |
116 | |
|
117 | |
public void beginFeedElement() throws FeedParserException { |
118 | |
|
119 | 0 | parser.listener.onChannel( parser.state, |
120 | |
getProperty( "title" ), |
121 | |
getProperty( "link" ), |
122 | |
getProperty( "description" ) ); |
123 | |
|
124 | 0 | } |
125 | |
|
126 | |
public void endFeedElement() throws FeedParserException { |
127 | 0 | parser.listener.onChannelEnd(); |
128 | 0 | } |
129 | |
|
130 | |
} |
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
class URLTemplate extends BaseDefaultHandler { |
136 | |
|
137 | 0 | public URLTemplate( RSSFeedParser parser ) { |
138 | |
|
139 | 0 | super( "url", parser.RSS_NAMESPACES, parser ); |
140 | |
|
141 | 0 | setNext( new ModContentTemplate( parser ) ); |
142 | |
|
143 | |
|
144 | 0 | } |
145 | |
|
146 | |
} |
147 | |
|
148 | |
|
149 | |
|
150 | |
|
151 | |
|
152 | |
|
153 | |
class ModContentTemplate extends BaseDefaultHandler { |
154 | |
|
155 | 0 | public ModContentTemplate( RSSFeedParser parser ) { |
156 | |
|
157 | 0 | super( "items", parser.MOD_CONTENT_NAMESPACES, parser ); |
158 | |
|
159 | 0 | this.setNext( new RDFValueTemplate( parser ) ); |
160 | |
|
161 | 0 | } |
162 | |
|
163 | |
} |
164 | |
|
165 | |
|
166 | |
|
167 | |
|
168 | |
|
169 | |
|
170 | |
class RDFValueTemplate extends BaseDefaultHandler { |
171 | |
|
172 | 0 | public RDFValueTemplate( RSSFeedParser parser ) { |
173 | |
|
174 | 0 | super( "value", parser.RDF_NAMESPACES, parser ); |
175 | |
|
176 | 0 | this.setIncludeContent( true ); |
177 | 0 | this.setNext( new RSSImageFeedParser( parser ) ); |
178 | |
|
179 | 0 | } |
180 | |
|
181 | |
public void endFeedElement() throws FeedParserException { |
182 | |
|
183 | 0 | } |
184 | |
|
185 | |
} |
186 | |
|
187 | |
} |
188 | |
|
189 | |
class RSSImageFeedParser extends BaseDefaultHandler { |
190 | |
|
191 | |
public RSSImageFeedParser( RSSFeedParser parser ) { |
192 | |
|
193 | 0 | super( "image", parser.RSS_NAMESPACES, parser ); |
194 | |
|
195 | 0 | setNext( new RSSItemFeedParser( parser ) ); |
196 | |
|
197 | 0 | } |
198 | |
|
199 | |
public void beginFeedElement() throws FeedParserException { |
200 | |
|
201 | 0 | parser.listener.onImage( parser.state, |
202 | |
getProperty( "title" ), |
203 | |
getProperty( "link" ), |
204 | |
getProperty( "url" ) ); |
205 | |
|
206 | 0 | } |
207 | |
|
208 | |
public void endFeedElement() throws FeedParserException { |
209 | 0 | parser.listener.onImageEnd(); |
210 | 0 | } |
211 | |
|
212 | |
} |
213 | |
|
214 | |
class RSSItemFeedParser extends BaseDefaultHandler { |
215 | |
|
216 | |
public RSSItemFeedParser( RSSFeedParser parser ) { |
217 | |
|
218 | 0 | super( "item", parser ); |
219 | 0 | this.namespaces = parser.RSS_NAMESPACES; |
220 | |
|
221 | 0 | setNext( new RSSTitleFeedParser( parser ) ); |
222 | |
|
223 | 0 | } |
224 | |
|
225 | |
public void beginFeedElement() throws FeedParserException { |
226 | |
|
227 | 0 | parser.listener.onItem( parser.state, |
228 | |
getProperty( "title" ), |
229 | |
getProperty( "link" ), |
230 | |
getProperty( "description" ), |
231 | |
null ); |
232 | |
|
233 | 0 | } |
234 | |
|
235 | |
public void endFeedElement() throws FeedParserException { |
236 | 0 | parser.listener.onItemEnd(); |
237 | 0 | } |
238 | |
|
239 | |
} |
240 | |
|
241 | |
class RSSTitleFeedParser extends BaseDefaultHandler { |
242 | |
|
243 | |
public RSSTitleFeedParser( RSSFeedParser parser ) { |
244 | |
|
245 | 0 | super( "title", parser ); |
246 | |
|
247 | 0 | setNext( new RSSLinkFeedParser( parser ) ); |
248 | |
|
249 | 0 | } |
250 | |
|
251 | |
} |
252 | |
|
253 | |
class RSSLinkFeedParser extends BaseDefaultHandler { |
254 | |
|
255 | |
public RSSLinkFeedParser( RSSFeedParser parser ) { |
256 | 0 | super( "link", parser ); |
257 | |
|
258 | 0 | setNext( new RSSDescriptionFeedParser( parser ) ); |
259 | 0 | } |
260 | |
|
261 | |
} |
262 | |
|
263 | |
class RSSDescriptionFeedParser extends BaseDefaultHandler { |
264 | |
|
265 | |
public RSSDescriptionFeedParser( RSSFeedParser parser ) { |
266 | 0 | super( "description", parser ); |
267 | 0 | } |
268 | |
|
269 | |
} |
270 | |
|
271 | |
|
272 | |
|
273 | |
|
274 | |
class RSSDcSubjectFeedParser extends BaseDefaultHandler { |
275 | |
|
276 | |
|
277 | |
|
278 | |
public RSSDcSubjectFeedParser( RSSFeedParser parser ) { |
279 | 0 | super( "subject", parser ); |
280 | 0 | } |
281 | |
|
282 | |
public void beginFeedElement() { |
283 | |
|
284 | |
|
285 | |
|
286 | |
|
287 | 0 | } |
288 | |
|
289 | |
public void endFeedElement() { |
290 | |
|
291 | 0 | } |
292 | |
|
293 | |
} |
294 | |
|
295 | |
class BaseDefaultHandler extends DefaultHandler { |
296 | |
|
297 | 0 | public static int STRING_BUFFER_CAPACITY = 100000; |
298 | |
|
299 | |
|
300 | |
|
301 | |
|
302 | |
|
303 | |
|
304 | |
|
305 | |
|
306 | |
|
307 | |
|
308 | |
|
309 | |
|
310 | |
|
311 | |
|
312 | |
|
313 | 0 | private String local = null; |
314 | |
|
315 | |
|
316 | 0 | private StringBuffer buff = null; |
317 | |
|
318 | 0 | private boolean onElement = false; |
319 | |
|
320 | 0 | private boolean includeContent = false; |
321 | |
|
322 | 0 | BaseDefaultHandler next = null; |
323 | |
|
324 | 0 | FeedParserListener listener = null; |
325 | |
|
326 | 0 | RSSFeedParser parser = null; |
327 | |
|
328 | 0 | static HashMap nsPrefixMapping = new HashMap(); |
329 | |
|
330 | |
|
331 | |
|
332 | |
|
333 | |
|
334 | 0 | HashSet namespaces = null; |
335 | |
|
336 | 0 | public BaseDefaultHandler( String local ) { |
337 | 0 | this.local = local; |
338 | 0 | } |
339 | |
|
340 | 0 | public BaseDefaultHandler( String local, RSSFeedParser parser ) { |
341 | |
|
342 | 0 | this.local = local; |
343 | 0 | this.parser = parser; |
344 | |
|
345 | 0 | } |
346 | |
|
347 | |
public BaseDefaultHandler( String local, |
348 | |
HashSet namespaces, |
349 | 0 | RSSFeedParser parser ) { |
350 | |
|
351 | 0 | this.local = local; |
352 | 0 | this.namespaces = namespaces; |
353 | 0 | this.parser = parser; |
354 | |
|
355 | 0 | } |
356 | |
|
357 | |
|
358 | |
|
359 | |
|
360 | |
|
361 | |
|
362 | |
public void setIncludeContent( boolean includeContent ) { |
363 | 0 | this.includeContent = includeContent; |
364 | 0 | } |
365 | |
|
366 | |
|
367 | |
|
368 | |
|
369 | |
|
370 | |
|
371 | |
public void setNext( BaseDefaultHandler next ) { |
372 | 0 | this.next = next; |
373 | 0 | } |
374 | |
|
375 | |
|
376 | |
|
377 | |
|
378 | |
|
379 | |
|
380 | |
public String toString() { |
381 | |
|
382 | 0 | if ( buff == null ) |
383 | 0 | return null; |
384 | |
|
385 | 0 | if ( buff.length() == 0 ) |
386 | 0 | return null; |
387 | |
|
388 | 0 | return buff.toString(); |
389 | |
} |
390 | |
|
391 | |
|
392 | |
|
393 | |
|
394 | |
|
395 | |
|
396 | |
|
397 | |
boolean isLocal( String namespace, String local ) { |
398 | |
|
399 | |
|
400 | 0 | if ( namespace != null && namespaces != null && ! namespaces.contains( namespace ) ) |
401 | 0 | return false; |
402 | |
|
403 | 0 | return this.local.equals( local ); |
404 | |
} |
405 | |
|
406 | |
|
407 | |
|
408 | |
|
409 | |
|
410 | |
|
411 | |
public String getProperty( String name ) { |
412 | 0 | return (String)parser.properties.get( name ); |
413 | |
} |
414 | |
|
415 | |
public boolean getBoolean( String name ) { |
416 | |
|
417 | 0 | return "true".equals( getProperty( name ) ); |
418 | |
|
419 | |
} |
420 | |
|
421 | |
|
422 | |
|
423 | |
|
424 | |
|
425 | |
|
426 | |
|
427 | 0 | public void beginFeedElement() throws FeedParserException {} |
428 | |
|
429 | |
|
430 | |
|
431 | |
|
432 | |
|
433 | |
|
434 | |
|
435 | 0 | public void endFeedElement() throws FeedParserException {} |
436 | |
|
437 | |
private boolean includeContentPrefix( String namespace ) { |
438 | |
|
439 | 0 | if ( namespace != null ) { |
440 | |
|
441 | 0 | String prefix = (String)nsPrefixMapping.get( namespace ); |
442 | |
|
443 | 0 | if ( prefix != null ) { |
444 | |
|
445 | 0 | buff.append( prefix ); |
446 | 0 | buff.append( ":" ); |
447 | 0 | return true; |
448 | |
} |
449 | |
|
450 | |
} |
451 | |
|
452 | 0 | return false; |
453 | |
|
454 | |
} |
455 | |
|
456 | |
|
457 | |
|
458 | |
|
459 | |
|
460 | |
|
461 | |
|
462 | |
|
463 | |
public void startPrefixMapping( String prefix, |
464 | |
String namespace ) throws SAXException { |
465 | |
|
466 | 0 | if ( prefix != null && ! "".equals( prefix ) ) { |
467 | |
|
468 | |
|
469 | 0 | nsPrefixMapping.put( namespace, prefix ); |
470 | |
|
471 | |
} |
472 | |
|
473 | 0 | } |
474 | |
|
475 | |
|
476 | |
|
477 | |
|
478 | |
|
479 | |
public void startElement( String namespace, |
480 | |
String local, |
481 | |
String qName, |
482 | |
Attributes attributes ) throws SAXException { |
483 | |
|
484 | 0 | if ( isLocal( namespace, local ) ) { |
485 | |
|
486 | |
|
487 | |
|
488 | |
|
489 | |
|
490 | |
|
491 | |
|
492 | |
|
493 | |
|
494 | |
|
495 | 0 | if ( buff == null ) { |
496 | 0 | buff = new StringBuffer( 1000 ); |
497 | |
} else { |
498 | 0 | buff.setLength( 0 ); |
499 | |
} |
500 | |
|
501 | 0 | onElement = true; |
502 | |
} |
503 | |
|
504 | 0 | if ( next != null ) |
505 | 0 | next.startElement( namespace, local, qName, attributes ); |
506 | |
|
507 | 0 | if ( includeContent && onElement ) { |
508 | 0 | buff.append( "<" ); |
509 | |
|
510 | 0 | boolean hasPrefix = includeContentPrefix( namespace ); |
511 | |
|
512 | 0 | buff.append( local ); |
513 | |
|
514 | 0 | if ( ! hasPrefix && namespace != null ) { |
515 | 0 | buff.append( " xmlns=\"" ); |
516 | 0 | buff.append( namespace ); |
517 | 0 | buff.append( "\"" ); |
518 | |
} |
519 | |
|
520 | |
|
521 | |
|
522 | 0 | int length = attributes.getLength(); |
523 | |
|
524 | 0 | for ( int i = 0; i < length; ++i ) { |
525 | |
|
526 | 0 | buff.append( " " ); |
527 | 0 | buff.append( attributes.getQName( i ) ); |
528 | 0 | buff.append( "=" ); |
529 | 0 | buff.append( "\"" ); |
530 | 0 | buff.append( attributes.getValue( i ) ); |
531 | 0 | buff.append( "\"" ); |
532 | |
|
533 | |
} |
534 | |
|
535 | 0 | buff.append( ">" ); |
536 | |
} |
537 | |
|
538 | 0 | } |
539 | |
|
540 | |
public void characters( char[] ch, |
541 | |
int start, |
542 | |
int length ) throws SAXException { |
543 | |
|
544 | 0 | if ( onElement ) { |
545 | 0 | buff.append( ch, start, length ); |
546 | |
} |
547 | |
|
548 | 0 | if ( next != null ) |
549 | 0 | next.characters( ch, start, length ); |
550 | |
|
551 | 0 | } |
552 | |
|
553 | |
public void endElement( String namespace, |
554 | |
String local, |
555 | |
String qName ) throws SAXException { |
556 | |
|
557 | |
try { |
558 | |
|
559 | 0 | if ( isLocal( namespace, local ) ) { |
560 | |
|
561 | 0 | onElement = false; |
562 | 0 | parser.properties.put( local, toString() ); |
563 | |
|
564 | 0 | beginFeedElement(); |
565 | |
|
566 | |
} |
567 | |
|
568 | 0 | if ( next != null ) |
569 | 0 | next.endElement( namespace, local, qName ); |
570 | |
|
571 | 0 | if ( isLocal( namespace, local ) ) |
572 | 0 | endFeedElement(); |
573 | |
|
574 | 0 | if ( includeContent && onElement ) { |
575 | 0 | buff.append( "</" ); |
576 | |
|
577 | 0 | includeContentPrefix( namespace ); |
578 | |
|
579 | 0 | buff.append( local ); |
580 | |
|
581 | 0 | buff.append( ">" ); |
582 | |
} |
583 | |
|
584 | 0 | } catch ( FeedParserException fpe ) { |
585 | |
|
586 | 0 | throw new SAXException( fpe ); |
587 | |
|
588 | 0 | } |
589 | |
|
590 | 0 | } |
591 | |
|
592 | |
} |
593 | |
|