001package org.apache.maven.doxia.module.twiki.parser; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026 027/** 028 * Parse looking for formated text (bold, italic, ...) 029 * 030 * @author Juan F. Codagnone 031 * @version $Id$ 032 */ 033public class FormatedTextParser 034{ 035 /** 036 * parser used to parse text... 037 */ 038 private TextParser textParser; 039 040 /** 041 * map used to create blocks dependening on the text format 042 */ 043 private static final Map<String, FormatBlockFactory> FACTORY_MAP = new HashMap<String, FormatBlockFactory>(); 044 045 /** 046 * creates bold blocks 047 */ 048 private static final FormatBlockFactory BOLD_FACTORY = new FormatBlockFactory() 049 { 050 /** {@inheritDoc} */ 051 public Block createBlock( final Block[] childrens ) 052 { 053 return new BoldBlock( childrens ); 054 } 055 }; 056 057 /** 058 * creates italic blocks 059 */ 060 private static final FormatBlockFactory ITALIC_FACTORY = new FormatBlockFactory() 061 { 062 /** {@inheritDoc} */ 063 public Block createBlock( final Block[] childrens ) 064 { 065 return new ItalicBlock( childrens ); 066 } 067 }; 068 069 /** 070 * creates monospaced blocks 071 */ 072 private static final FormatBlockFactory MONOSPACED_FACTORY = new FormatBlockFactory() 073 { 074 /** {@inheritDoc} */ 075 public Block createBlock( final Block[] childrens ) 076 { 077 return new MonospaceBlock( childrens ); 078 } 079 }; 080 081 /** 082 * creates bold italic blocks 083 */ 084 private static final FormatBlockFactory BOLDITALIC_FACTORY = new FormatBlockFactory() 085 { 086 /** {@inheritDoc} */ 087 public Block createBlock( final Block[] childrens ) 088 { 089 return new BoldBlock( new Block[] { new ItalicBlock( childrens ) } ); 090 } 091 }; 092 093 /** 094 * creates bold monospace blocks 095 */ 096 private static final FormatBlockFactory BOLDMONO_FACTORY = new FormatBlockFactory() 097 { 098 /** {@inheritDoc} */ 099 public Block createBlock( final Block[] childrens ) 100 { 101 return new BoldBlock( new Block[] { new MonospaceBlock( childrens ) } ); 102 } 103 }; 104 105 /** 106 * format characters 107 */ 108 private static final String[] SPECIAL_CHAR = new String[] { "__", "==", "*", "_", "=" }; 109 110 static 111 { 112 FACTORY_MAP.put( "*", BOLD_FACTORY ); 113 FACTORY_MAP.put( "_", ITALIC_FACTORY ); 114 FACTORY_MAP.put( "=", MONOSPACED_FACTORY ); 115 FACTORY_MAP.put( "__", BOLDITALIC_FACTORY ); 116 FACTORY_MAP.put( "==", BOLDMONO_FACTORY ); 117 } 118 119 /** 120 * @param line line to parse 121 * @return TextBlock, ItalicBlock, BoldBlock, MonospacedBlock, ... 122 */ 123 final Block[] parse( final String line ) 124 { 125 return parseFormat( line ).toArray( new Block[] {} ); 126 } 127 128 /** 129 * @param c character to test 130 * @return <code>true</code> if c is a space character 131 */ 132 static boolean isSpace( final char c ) 133 { 134 return c == ' ' || c == '\t'; 135 } 136 137 /** 138 * @param c character to test 139 * @return <code>true</code> if c is a character that limits the formats 140 */ 141 static boolean isSpecial( final char c ) 142 { 143 boolean ret = false; 144 145 for ( int i = 0; !ret && i < SPECIAL_CHAR.length; i++ ) 146 { 147 if ( SPECIAL_CHAR[i].charAt( 0 ) == c ) 148 { 149 ret = true; 150 } 151 } 152 153 return ret; 154 } 155 156 /** 157 * Parse text format (bold, italic...) 158 * <p/> 159 * TODO too many lines!! 160 * 161 * @param line line to parse 162 * @return list of blocks 163 */ 164 private List<Block> parseFormat( final String line ) 165 { 166 final List<Block> ret = new ArrayList<Block>(); 167 final int[] lhOffsets = new int[SPECIAL_CHAR.length]; 168 final int[] rhOffsets = new int[SPECIAL_CHAR.length]; 169 170 // for each text format markers... 171 for ( int i = 0; i < SPECIAL_CHAR.length; i++ ) 172 { 173 final int specialLen = SPECIAL_CHAR[i].length(); 174 int t = 0; 175 // search the nearset instance of this marker... 176 while ( t != -1 && ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 ) 177 { 178 // and check if it at the begining of a word. 179 if ( t == 0 || isSpace( line.charAt( t - 1 ) ) || isParenthesis( line.charAt( t - 1 ) ) ) 180 { 181 // if it is, and if, check to avoid going beyond the string 182 if ( t + specialLen < line.length() ) 183 { 184 // and if character after the format marker is another 185 // marker, is an error, and should be ignored 186 if ( isSpecial( line.charAt( t + specialLen ) ) ) 187 { 188 t += specialLen; 189 } 190 else 191 { 192 // else we find a starter! 193 break; 194 } 195 } 196 else 197 { 198 t = -1; 199 } 200 } 201 else 202 { 203 t += specialLen; 204 } 205 } 206 lhOffsets[i] = t; 207 } 208 209 // for each text format markers... 210 for ( int i = 0; i < lhOffsets.length; i++ ) 211 { 212 final int specialLen = SPECIAL_CHAR[i].length(); 213 // if we found a text format beginning 214 if ( lhOffsets[i] != -1 ) 215 { 216 int t = lhOffsets[i] + specialLen; 217 // search for a text format ending 218 while ( ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 ) 219 { 220 // must be side by side to a word 221 final char c = line.charAt( t - 1 ); 222 if ( t > 0 && !isSpace( c ) && !isSpecial( c ) ) 223 { 224 break; 225 } 226 else 227 { 228 t += specialLen; 229 } 230 } 231 rhOffsets[i] = t; 232 } 233 } 234 235 // find the nearest index 236 int minIndex = -1; 237 int charType = 0; 238 for ( int i = 0; i < lhOffsets.length; i++ ) 239 { 240 if ( lhOffsets[i] != -1 && rhOffsets[i] != 1 ) 241 { 242 if ( minIndex == -1 || lhOffsets[i] < minIndex ) 243 { 244 if ( rhOffsets[i] > lhOffsets[i] ) 245 { 246 // ej: "mary *has a little lamb" 247 minIndex = lhOffsets[i]; 248 charType = i; 249 } 250 } 251 } 252 } 253 254 if ( minIndex == -1 ) 255 { 256 ret.addAll( textParser.parse( line ) ); 257 } 258 else 259 { 260 int len = SPECIAL_CHAR[charType].length(); 261 ret.addAll( parseFormat( line.substring( 0, minIndex ) ) ); 262 ret.add( FACTORY_MAP.get( SPECIAL_CHAR[charType] ) 263 .createBlock( parseFormat( line.substring( minIndex + len, rhOffsets[charType] ) ) 264 .toArray( new Block[] {} ) ) ); 265 ret.addAll( parseFormat( line.substring( rhOffsets[charType] + len ) ) ); 266 } 267 268 // profit 269 return ret; 270 } 271 272 /** 273 * @param c character to test 274 * @return <code>true</code> if c is a parenthesis 275 */ 276 private boolean isParenthesis( final char c ) 277 { 278 return c == '(' || c == ')'; 279 } 280 281 /** 282 * Sets the formatTextParser. 283 * 284 * @param textParser text parser to use 285 * <code>TextParser</code> with the formatTextParser. 286 */ 287 public final void setTextParser( final TextParser textParser ) 288 { 289 if ( textParser == null ) 290 { 291 throw new IllegalArgumentException( "argument can't be null" ); 292 } 293 294 this.textParser = textParser; 295 } 296} 297 298/** 299 * @author Juan F. Codagnone 300 * @version $Id$ 301 */ 302interface FormatBlockFactory 303{ 304 /** 305 * factory method of format <code>Block</code> 306 * 307 * @param childrens children of the format block 308 * @return a format block 309 */ 310 Block createBlock( final Block[] childrens ); 311}