Coverage Report - org.apache.maven.doxia.module.twiki.parser.TextParser
 
Classes in this File Line Coverage Branch Coverage Complexity
TextParser
90%
111/122
82%
56/68
3,357
 
 1  
 package org.apache.maven.doxia.module.twiki.parser;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.util.ArrayList;
 23  
 import java.util.List;
 24  
 import java.util.StringTokenizer;
 25  
 import java.util.regex.Matcher;
 26  
 import java.util.regex.Pattern;
 27  
 
 28  
 /**
 29  
  * Parse almost plain text in search of WikiWords, links, ...
 30  
  *
 31  
  * @author Juan F. Codagnone
 32  
  * @version $Id: TextParser.java 1090706 2011-04-09 23:15:28Z hboutemy $
 33  
  */
 34  
 public class TextParser
 35  
 {
 36  
     /**
 37  
      * pattern to detect WikiWords
 38  
      */
 39  1
     private static final Pattern WIKIWORD_PATTERN =
 40  
         Pattern.compile( "(!?([A-Z]\\w*[.])?([A-Z][a-z]+){2,}(#\\w*)?)" );
 41  
 
 42  
     /**
 43  
      * pattern to detect SpecificLinks links [[reference][text]]
 44  
      */
 45  1
     private static final Pattern SPECIFICLINK_PATTERN = Pattern.compile( "!?\\[\\[([^\\]]+)\\]\\[([^\\]]+)\\]\\]" );
 46  
 
 47  
     /**
 48  
      * pattern to detect ForcedLinks links [[reference asd]]
 49  
      */
 50  1
     private static final Pattern FORCEDLINK_PATTERN = Pattern.compile( "(!)?(\\[\\[(.+)\\]\\])" );
 51  
 
 52  
     /**
 53  
      * anchor name
 54  
      */
 55  1
     private static final Pattern ANCHOR_PATTERN = Pattern.compile( "#(([A-Z][A-Za-z]*){2,})" );
 56  
 
 57  
     /**
 58  
      * url word
 59  
      */
 60  1
     private static final Pattern URL_PATTERN = Pattern.compile( "(\\w+):[/][/][^\\s]*" );
 61  
 
 62  
     /**
 63  
      *  image pattern specification
 64  
      */
 65  1
     private static final Pattern IMAGE_PATTERN = Pattern.compile( "(.*)\\.(png|jpg|gif|bmp)" );
 66  
 
 67  
     /**
 68  
      *  image tag pattern specification (used for images at relative URLs)
 69  
      */
 70  1
     private static final Pattern IMAGE_TAG_PATTERN =
 71  
         Pattern.compile( "<img\\b.*?\\bsrc=([\"'])(.*?)\\1.*>", Pattern.CASE_INSENSITIVE );
 72  
 
 73  
     /** HTML tag pattern */
 74  1
     private static final Pattern HTML_TAG_PATTERN = Pattern.compile( "<(/?)([\\w]*)(.*?)(/?)>", Pattern.DOTALL );
 75  
 
 76  
     /**
 77  
      * resolves wikiWordLinks
 78  
      */
 79  
     private final WikiWordLinkResolver wikiWordLinkResolver;
 80  
 
 81  
     /** resolves noautolink tag */
 82  
     private boolean noautolink;
 83  
 
 84  
     /**
 85  
      * Creates the TextParser.
 86  
      *
 87  
      * @param resolver resolver for wikiWord links
 88  
      */
 89  
     public TextParser( final WikiWordLinkResolver resolver )
 90  95
     {
 91  95
         this.wikiWordLinkResolver = resolver;
 92  95
     }
 93  
 
 94  
     /**
 95  
      * <p>parse.</p>
 96  
      *
 97  
      * @param line line to parse
 98  
      * @return a list of block that represents the input
 99  
      */
 100  
     public final List<Block> parse( final String line )
 101  
     {
 102  203
         final List<Block> ret = new ArrayList<Block>();
 103  
 
 104  203
         final Matcher linkMatcher = SPECIFICLINK_PATTERN.matcher( line );
 105  203
         final Matcher wikiMatcher = WIKIWORD_PATTERN.matcher( line );
 106  203
         final Matcher forcedLinkMatcher = FORCEDLINK_PATTERN.matcher( line );
 107  203
         final Matcher anchorMatcher = ANCHOR_PATTERN.matcher( line );
 108  203
         final Matcher urlMatcher = URL_PATTERN.matcher( line );
 109  203
         final Matcher imageTagMatcher = IMAGE_TAG_PATTERN.matcher( line );
 110  
 
 111  203
         final Matcher tagMatcher = HTML_TAG_PATTERN.matcher( line );
 112  203
         Matcher xhtmlMatcher = null;
 113  203
         if ( tagMatcher.find() )
 114  
         {
 115  1
             String tag = tagMatcher.group( 2 );
 116  
 
 117  1
             Pattern pattern =
 118  
                 Pattern.compile( "(\\<" + tag + ".*\\>)(.*)?(\\<\\/" + tag + "\\>)(.*)?", Pattern.DOTALL );
 119  1
             xhtmlMatcher = pattern.matcher( line );
 120  
         }
 121  
 
 122  203
         if ( xhtmlMatcher != null && xhtmlMatcher.find() )
 123  
         {
 124  0
             parseXHTML( line, ret, xhtmlMatcher );
 125  
         }
 126  203
         else if ( linkMatcher.find() )
 127  
         {
 128  10
             parseLink( line, ret, linkMatcher );
 129  
         }
 130  193
         else if ( wikiMatcher.find() && startLikeWord( wikiMatcher, line ) && !noautolink )
 131  
         {
 132  7
             parseWiki( line, ret, wikiMatcher );
 133  
         }
 134  186
         else if ( forcedLinkMatcher.find() )
 135  
         {
 136  5
             parseForcedLink( line, ret, forcedLinkMatcher );
 137  
         }
 138  181
         else if ( anchorMatcher.find() && isAWord( anchorMatcher, line ) )
 139  
         {
 140  1
             parseAnchor( line, ret, anchorMatcher );
 141  
         }
 142  180
         else if ( urlMatcher.find() && isAWord( urlMatcher, line ) )
 143  
         {
 144  2
             parseUrl( line, ret, urlMatcher );
 145  
         }
 146  178
         else if ( imageTagMatcher.find() )
 147  
         {
 148  1
             parseImage( line, ret, imageTagMatcher );
 149  
         }
 150  
         else
 151  
         {
 152  177
             if ( line.length() != 0 )
 153  
             {
 154  142
                 ret.add( new TextBlock( line ) );
 155  
             }
 156  
         }
 157  
 
 158  203
         return ret;
 159  
     }
 160  
 
 161  
     /**
 162  
      * Parses the image tag
 163  
      * @param line the line to parse
 164  
      * @param ret where the results live
 165  
      * @param imageTagMatcher image tag matcher
 166  
      */
 167  
     private void parseImage( final String line, final List<Block> ret, final Matcher imageTagMatcher )
 168  
     {
 169  1
         ret.addAll( parse( line.substring( 0, imageTagMatcher.start() ) ) );
 170  1
         final String src = imageTagMatcher.group( 2 );
 171  1
         ret.add( new ImageBlock( src ) );
 172  1
         ret.addAll( parse( line.substring( imageTagMatcher.end(), line.length() ) ) );
 173  1
     }
 174  
 
 175  
     /**
 176  
      * Parses the url
 177  
      * @param line the line to parse
 178  
      * @param ret where the results live
 179  
      * @param urlMatcher url matcher
 180  
      */
 181  
     private void parseUrl( final String line, final List<Block> ret, final Matcher urlMatcher )
 182  
     {
 183  2
         ret.addAll( parse( line.substring( 0, urlMatcher.start() ) ) );
 184  2
         final String url = urlMatcher.group( 0 );
 185  2
         final Matcher imageMatcher = IMAGE_PATTERN.matcher( url );
 186  2
         if ( imageMatcher.matches() )
 187  
         {
 188  1
             ret.add( new ImageBlock( url ) );
 189  
         }
 190  
         else
 191  
         {
 192  1
             ret.add( new LinkBlock( url, new TextBlock( url ) ) );
 193  
         }
 194  2
         ret.addAll( parse( line.substring( urlMatcher.end(), line.length() ) ) );
 195  2
     }
 196  
 
 197  
     /**
 198  
      * Parses the anchor
 199  
      * @param line the line to parse
 200  
      * @param ret where the results live
 201  
      * @param anchorMatcher anchor matcher
 202  
      */
 203  
     private void parseAnchor( final String line, final List<Block> ret, final Matcher anchorMatcher )
 204  
     {
 205  1
         ret.addAll( parse( line.substring( 0, anchorMatcher.start() ) ) );
 206  1
         ret.add( new AnchorBlock( anchorMatcher.group( 1 ) ) );
 207  1
         ret.addAll( parse( line.substring( anchorMatcher.end(), line.length() ) ) );
 208  1
     }
 209  
 
 210  
     /**
 211  
      * Parses the link
 212  
      * @param line line to parse
 213  
      * @param ret where the results live
 214  
      * @param forcedLinkMatcher forced link matcher
 215  
      */
 216  
     private void parseForcedLink( final String line, final List<Block> ret, final Matcher forcedLinkMatcher )
 217  
     {
 218  5
         if ( forcedLinkMatcher.group( 1 ) != null )
 219  
         {
 220  1
             ret.add( new TextBlock( forcedLinkMatcher.group( 2 ) ) );
 221  
         }
 222  
         else
 223  
         {
 224  4
             final String showText = forcedLinkMatcher.group( 3 );
 225  
             // mailto link:
 226  4
             if ( showText.trim().startsWith( "mailto:" ) )
 227  
             {
 228  1
                 String s = showText.trim();
 229  1
                 int i = s.indexOf( ' ' );
 230  1
                 if ( i == -1 )
 231  
                 {
 232  0
                     ret.add( new TextBlock( s ) );
 233  
                 }
 234  
                 else
 235  
                 {
 236  1
                     ret.add( new LinkBlock( s.substring( 0, i ), new TextBlock( s.substring( i ).trim() ) ) );
 237  
                 }
 238  1
             }
 239  
             else
 240  
             {
 241  3
                 ret.addAll( parse( line.substring( 0, forcedLinkMatcher.start() ) ) );
 242  3
                 ret.add( createLink( showText, showText ) );
 243  3
                 ret.addAll( parse( line.substring( forcedLinkMatcher.end(), line.length() ) ) );
 244  
             }
 245  
         }
 246  5
     }
 247  
 
 248  
     /**
 249  
      * Decides between a WikiWordBlock or a a LinkBlock
 250  
      * @param link the link text
 251  
      * @param showText the show text.
 252  
      * @return either a WikiWordBlock or a LinkBlock
 253  
      */
 254  
     private Block createLink( final String link, final String showText )
 255  
     {
 256  
         final Block content;
 257  12
         if ( URL_PATTERN.matcher( showText ).matches() && IMAGE_PATTERN.matcher( showText ).matches() )
 258  
         {
 259  1
             content = new ImageBlock( showText );
 260  
         }
 261  
         else
 262  
         {
 263  11
             content = new TextBlock( showText );
 264  
         }
 265  
 
 266  12
         if ( URL_PATTERN.matcher( link ).matches() )
 267  
         {
 268  6
             return new LinkBlock( link, content );
 269  
         }
 270  
 
 271  6
         final StringTokenizer tokenizer = new StringTokenizer( link );
 272  6
         final StringBuffer sb = new StringBuffer();
 273  
 
 274  15
         while ( tokenizer.hasMoreElements() )
 275  
         {
 276  9
             final String s = tokenizer.nextToken();
 277  9
             sb.append( s.substring( 0, 1 ).toUpperCase() );
 278  9
             sb.append( s.substring( 1 ) );
 279  9
         }
 280  6
         return new WikiWordBlock( sb.toString(), content, wikiWordLinkResolver );
 281  
     }
 282  
 
 283  
     /**
 284  
      * Parses a wiki word
 285  
      * @param line the line to parse
 286  
      * @param ret where the results live
 287  
      * @param wikiMatcher wiki matcher
 288  
      */
 289  
     private void parseWiki( final String line, final List<Block> ret, final Matcher wikiMatcher )
 290  
     {
 291  7
         final String wikiWord = wikiMatcher.group();
 292  7
         ret.addAll( parse( line.substring( 0, wikiMatcher.start() ) ) );
 293  7
         if ( wikiWord.startsWith( "!" ) )
 294  
         { // link prevention
 295  1
             ret.add( new TextBlock( wikiWord.substring( 1 ) ) );
 296  
         }
 297  
         else
 298  
         {
 299  6
             ret.add( new WikiWordBlock( wikiWord, wikiWordLinkResolver ) );
 300  
         }
 301  7
         ret.addAll( parse( line.substring( wikiMatcher.end(), line.length() ) ) );
 302  7
     }
 303  
 
 304  
     /**
 305  
      * Parses a link
 306  
      * @param line the line to parse
 307  
      * @param ret where the results live
 308  
      * @param linkMatcher link matcher
 309  
      */
 310  
     private void parseLink( final String line, final List<Block> ret, final Matcher linkMatcher )
 311  
     {
 312  10
         ret.addAll( parse( line.substring( 0, linkMatcher.start() ) ) );
 313  10
         if ( line.charAt( linkMatcher.start() ) == '!' )
 314  
         {
 315  1
             ret.add( new TextBlock( line.substring( linkMatcher.start() + 1, linkMatcher.end() ) ) );
 316  
         }
 317  
         else
 318  
         {
 319  9
             ret.add( createLink( linkMatcher.group( 1 ), linkMatcher.group( 2 ) ) );
 320  
         }
 321  10
         ret.addAll( parse( line.substring( linkMatcher.end(), line.length() ) ) );
 322  10
     }
 323  
 
 324  
     /**
 325  
      * Parses xhtml.
 326  
      *
 327  
      * @param line the line to parse
 328  
      * @param ret where the results live
 329  
      * @param xhtmlMatcher xhtml matcher
 330  
      */
 331  
     private void parseXHTML( final String line, final List<Block> ret, final Matcher xhtmlMatcher )
 332  
     {
 333  0
         if ( xhtmlMatcher.group( 1 ).indexOf( "noautolink" ) != -1 )
 334  
         {
 335  0
             noautolink = true;
 336  
         }
 337  
         else
 338  
         {
 339  0
             ret.add( new XHTMLBlock( xhtmlMatcher.group( 1 ) ) );
 340  
         }
 341  
 
 342  0
         ret.addAll( parse( xhtmlMatcher.group( 2 ) ) );
 343  
 
 344  0
         if ( xhtmlMatcher.group( 1 ).indexOf( "noautolink" ) != -1 )
 345  
         {
 346  0
             noautolink = false;
 347  
         }
 348  
         else
 349  
         {
 350  0
             ret.add( new XHTMLBlock( xhtmlMatcher.group( 3 ) ) );
 351  
         }
 352  
 
 353  0
         ret.addAll( parse( xhtmlMatcher.group( 4 ) ) );
 354  0
     }
 355  
 
 356  
     /**
 357  
      * @param m    matcher to test
 358  
      * @param line line to test
 359  
      * @return <code>true</code> if the match on m represent a word (must be
 360  
      *         a space before the word or must be the beginning of the line)
 361  
      */
 362  
     private boolean isAWord( final Matcher m, final String line )
 363  
     {
 364  4
         return startLikeWord( m, line ) && endLikeWord( m, line );
 365  
     }
 366  
 
 367  
     /**
 368  
      * @param m matcher to test
 369  
      * @param line line to test
 370  
      * @return true if it is the beginning of a word
 371  
      */
 372  
     private boolean startLikeWord( final Matcher m, final String line )
 373  
     {
 374  15
         final int start = m.start();
 375  
 
 376  15
         boolean ret = false;
 377  15
         if ( start == 0 )
 378  
         {
 379  6
             ret = true;
 380  
         }
 381  9
         else if ( start > 0 )
 382  
         {
 383  9
             if ( isSpace( line.charAt( start - 1 ) ) )
 384  
             {
 385  5
                 ret = true;
 386  
             }
 387  
         }
 388  
 
 389  15
         return ret;
 390  
     }
 391  
 
 392  
     /**
 393  
      * @param m matcher to test
 394  
      * @param line line to test
 395  
      * @return true if it is the end of a word
 396  
      */
 397  
     private boolean endLikeWord( final Matcher m, final String line )
 398  
     {
 399  4
         final int end = m.end();
 400  
 
 401  4
         boolean ret = true;
 402  4
         if ( end < line.length() )
 403  
         {
 404  4
             ret = isSpace( line.charAt( end ) );
 405  
         }
 406  
 
 407  4
         return ret;
 408  
     }
 409  
 
 410  
     /**
 411  
      * @param c char to test
 412  
      * @return <code>true</code> if c is a space char
 413  
      */
 414  
     private boolean isSpace( final char c )
 415  
     {
 416  13
         return c == ' ' || c == '\t';
 417  
     }
 418  
 }