Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
AnchorParser |
|
| 1.8333333333333333;1.833 | ||||
AnchorParser$1 |
|
| 1.8333333333333333;1.833 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | package org.apache.commons.feedparser.locate; | |
17 | ||
18 | import java.util.HashMap; | |
19 | ||
20 | /** | |
21 | * | |
22 | * Given a string of HTML content, parse out anchors and fire events with all | |
23 | * the data when they are found. | |
24 | * | |
25 | * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a> | |
26 | */ | |
27 | 0 | public class AnchorParser { |
28 | ||
29 | public static void parse( String content, | |
30 | AnchorParserListener listener ) | |
31 | throws AnchorParserException { | |
32 | ||
33 | //FIXME: we do NOT obey base right now and this is a BIG problem! | |
34 | ||
35 | 0 | parseAnchors( content, listener ); |
36 | ||
37 | 0 | } |
38 | ||
39 | /** | |
40 | * Get links from the given html with included titles and other metainfo. | |
41 | * | |
42 | * @deprecated use HTParser | |
43 | * | |
44 | */ | |
45 | public static void parseAnchors( String content, | |
46 | AnchorParserListener listener ) | |
47 | throws AnchorParserException { | |
48 | ||
49 | 0 | int index = 0; |
50 | 0 | int begin = 0; |
51 | 0 | int end = 0; |
52 | ||
53 | //FIXME: what if there are HTML comments here? We would parse links | |
54 | //within comments which isn't what we want. | |
55 | ||
56 | // FIXME: how do we pass back the content of the href? | |
57 | // | |
58 | // <a href=''> this is the content </a> | |
59 | // | |
60 | // which would pass a string "this is the content" | |
61 | ||
62 | //Matcher m = pattern.matcher( content ); | |
63 | ||
64 | 0 | while ( (begin = content.indexOf( "<a", index )) != -1 ) { |
65 | ||
66 | 0 | index = begin; |
67 | ||
68 | 0 | end = content.indexOf( "</a>", index ); |
69 | 0 | if ( end == -1 ) |
70 | 0 | break; |
71 | 0 | index = end + 1; |
72 | ||
73 | 0 | String match = content.substring( begin, end ); |
74 | ||
75 | 0 | HashMap map = DiscoveryLocator.getAttributes( match ); |
76 | //String resource = EntityDecoder.decode( m.group( 1 ) ); | |
77 | ||
78 | //FIXME: we SHOULD be using this but its not working right now. | |
79 | 0 | String resource = (String)map.get( "href" ); |
80 | ||
81 | 0 | if ( resource == null || resource.equals( "" ) ) { |
82 | 0 | continue; |
83 | } | |
84 | ||
85 | 0 | String title = (String)map.get( "title" ); |
86 | ||
87 | 0 | if ( title != null ) |
88 | 0 | title = EntityDecoder.decode( title ); |
89 | ||
90 | 0 | String rel = (String)map.get( "rel" ); |
91 | ||
92 | 0 | if ( ! listener.onAnchor( resource, rel, title ) ) |
93 | 0 | return; |
94 | ||
95 | 0 | } |
96 | ||
97 | 0 | } |
98 | ||
99 | public static void main( String[] args ) throws Exception { | |
100 | ||
101 | 0 | AnchorParserListener listener = new AnchorParserListener() { |
102 | ||
103 | public boolean onAnchor( String href, String rel, String title ) { | |
104 | ||
105 | 0 | System.out.println( "href: " + href ); |
106 | 0 | System.out.println( "rel: " + rel ); |
107 | 0 | System.out.println( "title: " + title ); |
108 | 0 | return true; |
109 | } | |
110 | ||
111 | public Object getResult() { | |
112 | 0 | return null; |
113 | } | |
114 | 0 | public void setContext( Object context ) {} |
115 | ||
116 | }; | |
117 | ||
118 | //FIXME: won't work with single quotes | |
119 | //FIXME: won't work with <a /> | |
120 | //parse( "<a href=\"http://peerfear.org\" rel=\"linux\" title=\"linux\" >adf</a>", listener ); | |
121 | ||
122 | //parse( "<a rel=\"linux\" href=\"http://peerfear.org\" title=\"linux\" >adf</a>", listener ); | |
123 | //parse( "<a title=\"linux\" rel=\"linux\" href=\"http://peerfear.org\" >adf</a>", listener ); | |
124 | ||
125 | //parse( "<a href='http://peerfear.org' rel='linux' title='linux' >adf</a>", listener ); | |
126 | ||
127 | 0 | parse( "<a href='mailto:burton@rojo.com' rel='linux' title='linux' ><img src='' /></a>", listener ); |
128 | ||
129 | 0 | } |
130 | ||
131 | } |