Coverage Report - org.apache.commons.feedparser.locate.DiscoveryLocator
Classes in this File Line Coverage Branch Coverage Complexity
  * Copyright 1999,2004 The Apache Software Foundation.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
 package org.apache.commons.feedparser.locate;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.commons.feedparser.FeedList;
 import org.apache.log4j.Logger;
  * @author <a href="">Kevin A. Burton</a>
 34  0
 public class DiscoveryLocator {
 36  0
     private static Logger log = Logger.getLogger( DiscoveryLocator.class );
      * Get a FULL link within the content. We then pull the attributes out of
      * this.
 42  0
     static Pattern element_pattern =
         Pattern.compile( "<link[^>]+",
                          Pattern.CASE_INSENSITIVE );
      * Regex to match on attributes.
      * Implementation: Mon Mar 14 2005 01:59 PM ( this is a
      * pretty difficult regexp to grok.
      * There's are two regexps here.  One for attributes with quotes and one
      * without. Each regexp has two groups - 1 is the name and 2 is the value.
      * You can split the regexp on | to better understand each individual
      * regexp.
     // > Attribute values MUST be one of the following: enclosed in double
     // > quotes, enclosed in single quotes, or not enclosed in quotes at all.
 62  0
     static String ATTR_REGEXP = "([a-zA-Z]+)=[\"']([^\"']+)[\"']|([a-zA-Z]+)=([^\"'>\r\n\t ]+)";
 64  0
     static Pattern ATTR_PATTERN = Pattern.compile( ATTR_REGEXP,
                                                    Pattern.CASE_INSENSITIVE );
 67  0
     static HashSet mediatypes = new HashSet();
     static {
 71  0
         mediatypes.add( FeedReference.ATOM_MEDIA_TYPE );
 72  0
         mediatypes.add( FeedReference.RSS_MEDIA_TYPE );
 73  0
         mediatypes.add( FeedReference.XML_MEDIA_TYPE );
 75  0
      * Locate a feed via RSS/Atom auto-discovery.  If both Atom and RSS are
      * listed we return both.  Actually we return all Atom/RSS or XML feeds
      * including FOAF.  It's up to the caller to use the correct feed.
     public static final List locate( String resource,
                                      String content,
                                      FeedList list )
         throws Exception {
         //this mechanism is easier but it isn't efficient.  I should just parse
         //elements forward until I discover </head>.  Also note that this isn't
         //doing all feed URLs just the first ones it finds.  
 93  0
         Matcher m = element_pattern.matcher( content );
 95  0
         while( m.find() ) {
             //the value of the link element XML... example:
             // <link rel="alternate" 
             //      href=""
             //      type="application/atom+xml" 
             //      title="ATOM" />
 103  0
             String element = 0 );
 105  0
             HashMap attributes = getAttributes( element );
 107  0
             String type = (String)attributes.get( "type" );
 108  0
             if (type != null)
 109  0
                 type = type.toLowerCase();
 111  0
             if ( mediatypes.contains( type )  ) {
                 //expand the href
 114  0
                 String href = (String)attributes.get( "href" );
 115  0
                 // > The href attribute MUST be present in an Atom autodiscovery element,
                 // > and its value MUST be the URI [RFC2396] of an Atom feed.  The value
                 // > MAY be a relative URI, and if so, clients MUST resolve it to a full
                 // > URI (section 5 of [RFC2396]) using the document's base URI (section
                 // > 12.4 of HTML 4 [W3C.REC-html401-19991224]).
 125  0
                 href = ResourceExpander.expand( resource, href );
 127  0
                 FeedReference feedReference = new FeedReference( href, type );
 129  0
                 feedReference.title = (String)attributes.get( "title" );
 131  0
                 list.add( feedReference );
 133  0
                 if ( type.equals( FeedReference.ATOM_MEDIA_TYPE ) )
 134  0
                     list.setFirstAdAtomFeed( feedReference );
 136  0
                 if ( type.equals( FeedReference.RSS_MEDIA_TYPE ) )
 137  0
                     list.setFirstAdRSSFeed( feedReference );
 141  0
 143  0
         return list;
      * Parse attributes within elements into a hashmap.
     public static HashMap getAttributes( String content ) {
 154  0
         HashMap map = new HashMap();
 156  0
         Matcher m = ATTR_PATTERN.matcher( content );
 158  0
         int index = 0;
 160  0
         while ( m.find( index ) ) {
 162  0
             String name = 1 );
 163  0
             String value = null;
             //Since we use an OR regexp the first match will be 1/2 and the
             //second will be 3/4
 167  0
             if ( name != null ) {
 168  0
                 value = 2 );
             } else {
 170  0
                 name = 3 );
 171  0
                 value = 4 );
             //String value = 2 ).toLowerCase().trim();
 175  0
             name = name.toLowerCase().trim();
             // Some services, such as AOL LiveJournal, are case sensitive
             // on their resource names; can't do a toLowerCase.
             // Brad Neuberg,
             // String value = 2 ).toLowerCase().trim();
 180  0
             value = value.trim();
 182  0
             if ( "".equals( value ) ) 
 183  0
                 value = null; 
 185  0
             map.put( name, value );
 187  0
             index =  m.end();
 189  0
 191  0
         return map;