1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
package org.apache.commons.feedparser; |
18 | |
|
19 | |
import java.util.Iterator; |
20 | |
import java.util.List; |
21 | |
|
22 | |
import org.apache.commons.feedparser.locate.EntityDecoder; |
23 | |
import org.jaxen.jdom.JDOMXPath; |
24 | |
import org.jdom.Attribute; |
25 | |
import org.jdom.CDATA; |
26 | |
import org.jdom.Comment; |
27 | |
import org.jdom.Element; |
28 | |
import org.jdom.Text; |
29 | |
import org.jdom.output.XMLOutputter; |
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | 0 | public class AtomFeedParser extends BaseParser { |
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
public static void parse( FeedParserListener listener, |
55 | |
org.jdom.Document doc ) throws Exception { |
56 | |
|
57 | 0 | FeedParserState state = new FeedParserState( listener ); |
58 | |
|
59 | 0 | FeedVersion v = new FeedVersion(); |
60 | 0 | v.isAtom = true; |
61 | 0 | listener.onFeedVersion( v ); |
62 | |
|
63 | 0 | listener.init(); |
64 | |
|
65 | 0 | Element root = doc.getRootElement(); |
66 | |
|
67 | 0 | doLocale( state, listener, root ); |
68 | |
|
69 | 0 | doChannel( state, listener, doc ); |
70 | 0 | doEntry( state, listener, doc ); |
71 | |
|
72 | 0 | doLocaleEnd( state, listener, root ); |
73 | |
|
74 | 0 | listener.finished(); |
75 | |
|
76 | 0 | } |
77 | |
|
78 | |
private static void doChannel( FeedParserState state, |
79 | |
FeedParserListener listener, |
80 | |
org.jdom.Document doc ) throws Exception { |
81 | |
|
82 | 0 | Element root = doc.getRootElement(); |
83 | |
|
84 | |
|
85 | 0 | String title = selectText( "/atom:feed/atom:title", root ); |
86 | |
|
87 | |
|
88 | |
|
89 | |
|
90 | 0 | String link = selectSingleAttribute( "/atom:feed/atom:link[@rel='alternate'][@type='text/html']/@href", root ); |
91 | |
|
92 | |
|
93 | |
|
94 | 0 | String tagline = selectText( "/atom:feed/atom:tagline", root ); |
95 | |
|
96 | |
|
97 | 0 | listener.onChannel( state, title, link, tagline ); |
98 | |
|
99 | 0 | listener.onChannelEnd(); |
100 | |
|
101 | 0 | } |
102 | |
|
103 | |
private static void doEntry( FeedParserState state, |
104 | |
FeedParserListener listener, |
105 | |
org.jdom.Document doc ) throws Exception { |
106 | |
|
107 | 0 | JDOMXPath xpath = new JDOMXPath( "/atom:feed/atom:entry" ); |
108 | 0 | xpath.setNamespaceContext( NS.context ); |
109 | |
|
110 | 0 | List items = xpath.selectNodes( doc ); |
111 | |
|
112 | 0 | Iterator i = items.iterator(); |
113 | |
|
114 | |
|
115 | 0 | while ( i.hasNext() ) { |
116 | |
|
117 | 0 | Element child = (Element)i.next(); |
118 | |
|
119 | 0 | doLocale( state, listener, child ); |
120 | |
|
121 | 0 | String title = selectText( "atom:title", child ); |
122 | |
|
123 | |
|
124 | |
|
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
|
137 | 0 | String link = selectSingleAttribute( "atom:link[@rel='alternate'][@type='text/html']/@href", |
138 | |
child ); |
139 | |
|
140 | |
|
141 | |
|
142 | |
|
143 | |
|
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
|
150 | |
|
151 | |
|
152 | |
|
153 | 0 | String description = null; |
154 | |
|
155 | 0 | Element summary = child.getChild( "summary", NS.ATOM ); |
156 | |
|
157 | 0 | if ( summary != null ) { |
158 | |
|
159 | 0 | String type = summary.getAttributeValue( "type", NS.ATOM ); |
160 | |
|
161 | 0 | if ( type == null || "text/plain".equals( type ) ) |
162 | 0 | description = summary.getText(); |
163 | |
|
164 | |
} |
165 | |
|
166 | 0 | state.current = child; |
167 | |
|
168 | 0 | listener.onItem( state, title, link, description, link ); |
169 | |
|
170 | 0 | doLink( state, listener, child ); |
171 | |
|
172 | 0 | doMeta( state, listener, child ); |
173 | |
|
174 | 0 | doContent( state, listener, child ); |
175 | |
|
176 | 0 | MetaFeedParser.parse( listener, state ); |
177 | 0 | TagFeedParser.parse( listener, state ); |
178 | |
|
179 | 0 | listener.onItemEnd(); |
180 | 0 | doLocale( state, listener, child ); |
181 | |
|
182 | 0 | } |
183 | |
|
184 | 0 | } |
185 | |
|
186 | |
private static void doLink( FeedParserState state, |
187 | |
FeedParserListener listener, |
188 | |
Element current ) throws Exception { |
189 | |
|
190 | 0 | if ( listener instanceof LinkFeedParserListener == false ) |
191 | 0 | return; |
192 | |
|
193 | 0 | LinkFeedParserListener lfpl = (LinkFeedParserListener)listener; |
194 | |
|
195 | 0 | JDOMXPath xpath = new JDOMXPath( "atom:link" ); |
196 | 0 | xpath.setNamespaceContext( NS.context ); |
197 | |
|
198 | 0 | List items = xpath.selectNodes( current ); |
199 | |
|
200 | 0 | Iterator it = items.iterator(); |
201 | |
|
202 | |
|
203 | 0 | while ( it.hasNext() ) { |
204 | |
|
205 | 0 | Element link = (Element)it.next(); |
206 | |
|
207 | 0 | String href = link.getAttributeValue( "href" ); |
208 | 0 | String rel = link.getAttributeValue( "rel" ); |
209 | 0 | String type = link.getAttributeValue( "type" ); |
210 | |
|
211 | 0 | String title = null; |
212 | 0 | long length = -1; |
213 | |
|
214 | 0 | lfpl.onLink( state, rel, type, href, title, length ); |
215 | |
|
216 | 0 | } |
217 | |
|
218 | 0 | } |
219 | |
|
220 | |
private static void doContent( FeedParserState state, |
221 | |
FeedParserListener listener, |
222 | |
Element current ) throws Exception { |
223 | |
|
224 | 0 | if ( ! (listener instanceof ContentFeedParserListener) ) |
225 | 0 | return; |
226 | |
|
227 | 0 | ContentFeedParserListener clistener = (ContentFeedParserListener)listener; |
228 | |
|
229 | 0 | JDOMXPath xpath = new JDOMXPath( "atom:content" ); |
230 | 0 | xpath.setNamespaceContext( NS.context ); |
231 | |
|
232 | 0 | List items = xpath.selectNodes( current ); |
233 | |
|
234 | 0 | Iterator i = items.iterator(); |
235 | |
|
236 | |
|
237 | 0 | while ( i.hasNext() ) { |
238 | |
|
239 | 0 | Element content = (Element)i.next(); |
240 | |
|
241 | 0 | doLocale( state, listener, content ); |
242 | |
|
243 | 0 | String type = content.getAttributeValue( "type", "text/plain" ); |
244 | 0 | String mode = content.getAttributeValue( "mode" ); |
245 | |
|
246 | 0 | String format = null; |
247 | 0 | String encoding = null; |
248 | |
|
249 | 0 | String value = null; |
250 | |
|
251 | |
|
252 | 0 | if ( "xml".equals( mode ) ) { |
253 | 0 | value = content.getText(); |
254 | 0 | } else if ( "escaped".equals( mode ) ) { |
255 | |
|
256 | |
|
257 | 0 | value = getXMLOfContent( content.getContent() ); |
258 | 0 | value = EntityDecoder.decode( value ); |
259 | |
} else { |
260 | 0 | mode = "xml"; |
261 | 0 | value = getXMLOfContent( content.getContent() ); |
262 | |
} |
263 | |
|
264 | 0 | boolean isSummary = false; |
265 | |
|
266 | 0 | clistener.onContent( state, type, format, encoding, mode, value, isSummary ); |
267 | |
|
268 | 0 | doLocaleEnd( state, listener, content ); |
269 | |
|
270 | 0 | } |
271 | |
|
272 | 0 | xpath = new JDOMXPath( "atom:summary[@type='application/xhtml+xml']" ); |
273 | 0 | xpath.setNamespaceContext( NS.context ); |
274 | 0 | Element e = (Element)xpath.selectSingleNode( current ); |
275 | |
|
276 | 0 | if ( e != null ) { |
277 | |
|
278 | 0 | String type = "text/html"; |
279 | 0 | String format = "application/xhtml+xml"; |
280 | 0 | String encoding = null; |
281 | 0 | String mode = "xml"; |
282 | |
|
283 | |
|
284 | |
|
285 | 0 | String value = getXMLOfContent( e ); |
286 | 0 | boolean isSummary = true; |
287 | |
|
288 | 0 | clistener.onContent( state, type, format, encoding, mode, value, isSummary ); |
289 | |
|
290 | |
} |
291 | |
|
292 | 0 | } |
293 | |
|
294 | |
private static String getXMLOfContent( Element element ) { |
295 | 0 | return getXMLOfContent( element.getContent() ); |
296 | |
} |
297 | |
|
298 | |
|
299 | |
|
300 | |
|
301 | |
|
302 | |
|
303 | |
private static String getXMLOfContent( List content ) { |
304 | |
|
305 | |
|
306 | |
|
307 | |
|
308 | |
|
309 | 0 | StringBuffer buff = new StringBuffer( 10000 ); |
310 | |
|
311 | |
|
312 | |
|
313 | 0 | XMLOutputter outputter = new XMLOutputter(); |
314 | |
|
315 | 0 | Iterator it = content.iterator(); |
316 | |
|
317 | 0 | while ( it.hasNext() ) { |
318 | |
|
319 | 0 | Object next = it.next(); |
320 | |
|
321 | 0 | if ( next instanceof String ) { |
322 | 0 | buff.append( (String)next ); |
323 | 0 | } else if ( next instanceof Element ) { |
324 | 0 | buff.append( outputter.outputString( (Element)next ) ); |
325 | 0 | } else if ( next instanceof CDATA ) { |
326 | 0 | buff.append( ((CDATA)next).getText() ); |
327 | 0 | } else if ( next instanceof Comment ) { |
328 | 0 | buff.append( outputter.outputString( (Comment)next ) ); |
329 | 0 | } else if ( next instanceof Text ) { |
330 | 0 | buff.append( outputter.outputString( (Text)next ) ); |
331 | |
} |
332 | |
|
333 | 0 | } |
334 | |
|
335 | 0 | return buff.toString(); |
336 | |
|
337 | |
} |
338 | |
|
339 | |
private static void doMeta( FeedParserState state, |
340 | |
FeedParserListener listener, |
341 | |
Element element ) throws Exception { |
342 | |
|
343 | |
|
344 | |
|
345 | 0 | if ( ! (listener instanceof MetaFeedParserListener) ) |
346 | 0 | return; |
347 | |
|
348 | 0 | MetaFeedParserListener mlistener = (MetaFeedParserListener)listener; |
349 | |
|
350 | |
|
351 | 0 | String subject = selectText( "dc:subject", element); |
352 | |
|
353 | 0 | if ( subject != null ) { |
354 | 0 | mlistener.onSubject( state, subject ); |
355 | 0 | mlistener.onSubjectEnd(); |
356 | |
} |
357 | |
|
358 | 0 | } |
359 | |
|
360 | |
private static Element selectSingleElement( String query, org.jdom.Document doc ) throws Exception { |
361 | |
|
362 | 0 | JDOMXPath xpath = new JDOMXPath( query ); |
363 | 0 | xpath.setNamespaceContext( NS.context ); |
364 | |
|
365 | |
|
366 | 0 | return (Element)xpath.selectSingleNode( doc ); |
367 | |
|
368 | |
} |
369 | |
|
370 | |
private static String selectSingleAttribute( String query, Element element ) throws Exception { |
371 | |
|
372 | 0 | JDOMXPath xpath = new JDOMXPath( query ); |
373 | 0 | xpath.setNamespaceContext( NS.context ); |
374 | |
|
375 | |
|
376 | 0 | Attribute a = (Attribute)xpath.selectSingleNode( element ); |
377 | 0 | if ( a == null ) |
378 | 0 | return null; |
379 | |
|
380 | 0 | return a.getValue(); |
381 | |
|
382 | |
} |
383 | |
|
384 | |
} |
385 | |
|