001package org.apache.maven.doxia.module.confluence.parser; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.util.ArrayList; 023import java.util.List; 024 025import org.codehaus.plexus.util.StringUtils; 026 027/** 028 * Re-usable builder that can be used to generate paragraph and list item text from a string containing all the content 029 * and wiki formatting. This class is intentionally stateful, but cheap to create, so create one as needed and keep it 030 * on the stack to preserve stateless behaviour in the caller. 031 * 032 * @author Dave Syer 033 * @since 1.1 034 */ 035public class ChildBlocksBuilder 036{ 037 private boolean insideBold = false; 038 039 private boolean insideItalic = false; 040 041 private boolean insideLink = false; 042 043 private boolean insideLinethrough = false; 044 045 private boolean insideUnderline = false; 046 047 private boolean insideSub = false; 048 049 private boolean insideSup = false; 050 051 private List<Block> blocks = new ArrayList<>(); 052 053 private StringBuilder text = new StringBuilder(); 054 055 private String input; 056 057 private boolean insideMonospaced; 058 059 /** 060 * <p>Constructor for ChildBlocksBuilder.</p> 061 * 062 * @param input the input. 063 */ 064 public ChildBlocksBuilder( String input ) 065 { 066 this.input = input; 067 } 068 069 /** 070 * Utility method to convert marked up content into blocks for rendering. 071 * 072 * @return a list of Blocks that can be used to render it 073 */ 074 public List<Block> getBlocks() 075 { 076 List<Block> specialBlocks = new ArrayList<>(); 077 078 for ( int i = 0; i < input.length(); i++ ) 079 { 080 char c = input.charAt( i ); 081 082 switch ( c ) 083 { 084 case '*': 085 if ( insideBold ) 086 { 087 insideBold = false; 088 specialBlocks = getList( new BoldBlock( getChildren( text, specialBlocks ) ), specialBlocks ); 089 text = new StringBuilder(); 090 } 091 else if ( insideMonospaced ) 092 { 093 text.append( c ); 094 } 095 else 096 { 097 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 098 insideBold = true; 099 } 100 101 break; 102 case '_': 103 if ( insideItalic ) 104 { 105 insideItalic = false; 106 specialBlocks = getList( new ItalicBlock( getChildren( text, specialBlocks ) ), specialBlocks ); 107 text = new StringBuilder(); 108 } 109 else if ( insideLink || insideMonospaced ) 110 { 111 text.append( c ); 112 } 113 else 114 { 115 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 116 insideItalic = true; 117 } 118 119 break; 120 case '-': 121 if ( insideLinethrough ) 122 { 123 insideLinethrough = false; 124 blocks.add( new LinethroughBlock( text.toString() ) ); 125 text = new StringBuilder(); 126 } 127 else if ( insideLink || insideMonospaced ) 128 { 129 text.append( c ); 130 } 131 else 132 { 133 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 134 insideLinethrough = true; 135 } 136 break; 137 case '+': 138 if ( insideUnderline ) 139 { 140 insideUnderline = false; 141 blocks.add( new UnderlineBlock( text.toString() ) ); 142 text = new StringBuilder(); 143 } 144 else if ( insideLink || insideMonospaced ) 145 { 146 text.append( c ); 147 } 148 else 149 { 150 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 151 insideUnderline = true; 152 } 153 break; 154 case '~': 155 if ( insideSub ) 156 { 157 insideSub = false; 158 blocks.add( new SubBlock( text.toString() ) ); 159 text = new StringBuilder(); 160 } 161 else if ( insideLink || insideMonospaced ) 162 { 163 text.append( c ); 164 } 165 else 166 { 167 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 168 insideSub = true; 169 } 170 break; 171 case '^': 172 if ( insideSup ) 173 { 174 insideSup = false; 175 blocks.add( new SupBlock( text.toString() ) ); 176 text = new StringBuilder(); 177 } 178 else if ( insideLink || insideMonospaced ) 179 { 180 text.append( c ); 181 } 182 else 183 { 184 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 185 insideSup = true; 186 } 187 break; 188 case '[': 189 if ( insideMonospaced ) 190 { 191 text.append( c ); 192 } 193 else 194 { 195 insideLink = true; 196 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 197 } 198 break; 199 case ']': 200 if ( insideLink ) 201 { 202 boolean addHTMLSuffix = false; 203 String link = text.toString(); 204 205 if ( !link.endsWith( ".html" ) ) 206 { 207 if ( !link.contains( "http" ) ) 208 { 209 // relative path: see DOXIA-298 210 addHTMLSuffix = true; 211 } 212 } 213 if ( link.contains( "|" ) ) 214 { 215 String[] pieces = StringUtils.split( text.toString(), "|" ); 216 217 if ( pieces[1].startsWith( "^" ) ) 218 { 219 // use the "file attachment" ^ syntax to force verbatim link: needed to allow actually 220 // linking to some non-html resources 221 pieces[1] = pieces[1].substring( 1 ); // now just get rid of the lead ^ 222 addHTMLSuffix = false; // force verbatim link to support attaching files/resources (not 223 // just .html files) 224 } 225 226 if ( addHTMLSuffix ) 227 { 228 if ( !pieces[1].contains( "#" ) ) 229 { 230 pieces[1] = pieces[1].concat( ".html" ); 231 } 232 else 233 { 234 if ( !pieces[1].startsWith( "#" ) ) 235 { 236 String[] temp = pieces[1].split( "#" ); 237 pieces[1] = temp[0] + ".html#" + temp[1]; 238 } 239 } 240 } 241 242 blocks.add( new LinkBlock( pieces[1], pieces[0] ) ); 243 } 244 else 245 { 246 String value = link; 247 248 if ( link.startsWith( "#" ) ) 249 { 250 value = link.substring( 1 ); 251 } 252 else if ( link.startsWith( "^" ) ) 253 { 254 link = link.substring( 1 ); // chop off the lead ^ from link and from value 255 value = link; 256 addHTMLSuffix = false; // force verbatim link to support attaching files/resources (not 257 // just .html files) 258 } 259 260 if ( addHTMLSuffix ) 261 { 262 if ( !link.contains( "#" ) ) 263 { 264 link = link.concat( ".html" ); 265 } 266 else 267 { 268 if ( !link.startsWith( "#" ) ) 269 { 270 String[] temp = link.split( "#" ); 271 link = temp[0] + ".html#" + temp[1]; 272 } 273 } 274 } 275 276 blocks.add( new LinkBlock( link, value ) ); 277 } 278 279 text = new StringBuilder(); 280 insideLink = false; 281 } 282 else if ( insideMonospaced ) 283 { 284 text.append( c ); 285 } 286 287 break; 288 case '{': 289 if ( insideMonospaced ) 290 { 291 text.append( c ); 292 } 293 else 294 { 295 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 296 297 if ( nextChar( input, i ) == '{' ) // it's monospaced 298 { 299 i++; 300 insideMonospaced = true; 301 } 302 } 303 // else it's a confluence macro... 304 305 break; 306 case '}': 307 if ( nextChar( input, i ) == '}' ) 308 { 309 i++; 310 insideMonospaced = false; 311 specialBlocks = getList( new MonospaceBlock( getChildren( text, specialBlocks ) ), 312 specialBlocks ); 313 text = new StringBuilder(); 314 } 315 else if ( insideMonospaced ) 316 { 317 text.append( c ); 318 } 319 else 320 { 321 String name = text.toString(); 322 if ( name.startsWith( "anchor:" ) ) 323 { 324 blocks.add( new AnchorBlock( name.substring( "anchor:".length() ) ) ); 325 } 326 else 327 { 328 blocks.add( new TextBlock( "{" + name + "}" ) ); 329 } 330 text = new StringBuilder(); 331 } 332 333 break; 334 case '\\': 335 if ( insideMonospaced ) 336 { 337 text.append( c ); 338 } 339 else if ( nextChar( input, i ) == '\\' ) 340 { 341 i++; 342 text = addTextBlockIfNecessary( blocks, specialBlocks, text ); 343 blocks.add( new LinebreakBlock() ); 344 } 345 else 346 { 347 // DOXIA-467 single trailing backward slash, double is considered linebreak 348 if ( i == input.length() - 1 ) 349 { 350 text.append( '\\' ); 351 } 352 else 353 { 354 text.append( input.charAt( ++i ) ); 355 } 356 } 357 358 break; 359 default: 360 text.append( c ); 361 } 362 363 if ( !specialBlocks.isEmpty() ) 364 { 365 if ( !insideItalic && !insideBold && !insideMonospaced ) 366 { 367 blocks.addAll( specialBlocks ); 368 specialBlocks.clear(); 369 } 370 } 371 372 } 373 374 if ( text.length() > 0 ) 375 { 376 blocks.add( new TextBlock( text.toString() ) ); 377 } 378 379 return blocks; 380 } 381 382 private List<Block> getList( Block block, List<Block> currentBlocks ) 383 { 384 List<Block> list = new ArrayList<>(); 385 386 if ( insideBold || insideItalic || insideMonospaced ) 387 { 388 list.addAll( currentBlocks ); 389 } 390 391 list.add( block ); 392 393 return list; 394 } 395 396 private List<Block> getChildren( StringBuilder buffer, List<Block> currentBlocks ) 397 { 398 String txt = buffer.toString().trim(); 399 400 if ( currentBlocks.isEmpty() && StringUtils.isEmpty( txt ) ) 401 { 402 return new ArrayList<>(); 403 } 404 405 ArrayList<Block> list = new ArrayList<>(); 406 407 if ( !insideBold && !insideItalic && !insideMonospaced ) 408 { 409 list.addAll( currentBlocks ); 410 } 411 412 if ( StringUtils.isEmpty( txt ) ) 413 { 414 return list; 415 } 416 417 list.add( new TextBlock( txt ) ); 418 419 return list; 420 } 421 422 private static char nextChar( String input, int i ) 423 { 424 return input.length() > i + 1 ? input.charAt( i + 1 ) : '\0'; 425 } 426 427 private StringBuilder addTextBlockIfNecessary( List<Block> blcks, List<Block> specialBlocks, StringBuilder txt ) 428 { 429 if ( txt.length() == 0 ) 430 { 431 return txt; 432 } 433 434 TextBlock textBlock = new TextBlock( txt.toString() ); 435 436 if ( !insideBold && !insideItalic && !insideMonospaced ) 437 { 438 blcks.add( textBlock ); 439 } 440 else 441 { 442 specialBlocks.add( textBlock ); 443 } 444 445 return new StringBuilder(); 446 } 447 448}