View Javadoc
1   package org.apache.maven.doxia.module.twiki.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  /**
28   * Parse looking for formated text (bold, italic, ...)
29   *
30   * @author Juan F. Codagnone
31   */
32  public class FormatedTextParser
33  {
34      /**
35       * parser used to parse text...
36       */
37      private TextParser textParser;
38  
39      /**
40       * map used to create blocks dependening on the text format
41       */
42      private static final Map<String, FormatBlockFactory> FACTORY_MAP = new HashMap<>();
43  
44      /**
45       * creates bold blocks
46       */
47      private static final FormatBlockFactory BOLD_FACTORY = new FormatBlockFactory()
48      {
49          /** {@inheritDoc} */
50          public Block createBlock( final Block[] childrens )
51          {
52              return new BoldBlock( childrens );
53          }
54      };
55  
56      /**
57       * creates italic blocks
58       */
59      private static final FormatBlockFactory ITALIC_FACTORY = new FormatBlockFactory()
60      {
61          /** {@inheritDoc} */
62          public Block createBlock( final Block[] childrens )
63          {
64              return new ItalicBlock( childrens );
65          }
66      };
67  
68      /**
69       * creates monospaced blocks
70       */
71      private static final FormatBlockFactory MONOSPACED_FACTORY = new FormatBlockFactory()
72      {
73          /** {@inheritDoc} */
74          public Block createBlock( final Block[] childrens )
75          {
76              return new MonospaceBlock( childrens );
77          }
78      };
79  
80      /**
81       * creates bold italic blocks
82       */
83      private static final FormatBlockFactory BOLDITALIC_FACTORY = new FormatBlockFactory()
84      {
85          /** {@inheritDoc} */
86          public Block createBlock( final Block[] childrens )
87          {
88              return new BoldBlock( new Block[] { new ItalicBlock( childrens ) } );
89          }
90      };
91  
92      /**
93       * creates bold monospace blocks
94       */
95      private static final FormatBlockFactory BOLDMONO_FACTORY = new FormatBlockFactory()
96      {
97          /** {@inheritDoc} */
98          public Block createBlock( final Block[] childrens )
99          {
100             return new BoldBlock( new Block[] { new MonospaceBlock( childrens ) } );
101         }
102     };
103 
104     /**
105      * format characters
106      */
107     private static final String[] SPECIAL_CHAR = new String[] { "__", "==", "*", "_", "=" };
108 
109     static
110     {
111         FACTORY_MAP.put( "*", BOLD_FACTORY );
112         FACTORY_MAP.put( "_", ITALIC_FACTORY );
113         FACTORY_MAP.put( "=", MONOSPACED_FACTORY );
114         FACTORY_MAP.put( "__", BOLDITALIC_FACTORY );
115         FACTORY_MAP.put( "==", BOLDMONO_FACTORY );
116     }
117 
118     /**
119      * @param line line to parse
120      * @return TextBlock, ItalicBlock, BoldBlock, MonospacedBlock, ...
121      */
122     final Block[] parse( final String line )
123     {
124         return parseFormat( line ).toArray( new Block[] {} );
125     }
126 
127     /**
128      * @param c character to test
129      * @return <code>true</code> if c is a space character
130      */
131     static boolean isSpace( final char c )
132     {
133         return c == ' ' || c == '\t';
134     }
135 
136     /**
137      * @param c character to test
138      * @return <code>true</code> if c is a character that limits the formats
139      */
140     static boolean isSpecial( final char c )
141     {
142         boolean ret = false;
143 
144         for ( int i = 0; !ret && i < SPECIAL_CHAR.length; i++ )
145         {
146             if ( SPECIAL_CHAR[i].charAt( 0 ) == c )
147             {
148                 ret = true;
149             }
150         }
151 
152         return ret;
153     }
154 
155     /**
156      * Parse text format (bold, italic...)
157      * <p/>
158      * TODO too many lines!!
159      *
160      * @param line line to parse
161      * @return list of blocks
162      */
163     private List<Block> parseFormat( final String line )
164     {
165         final List<Block> ret = new ArrayList<>();
166         final int[] lhOffsets = new int[SPECIAL_CHAR.length];
167         final int[] rhOffsets = new int[SPECIAL_CHAR.length];
168 
169         // for each text format markers...
170         for ( int i = 0; i < SPECIAL_CHAR.length; i++ )
171         {
172             final int specialLen = SPECIAL_CHAR[i].length();
173             int t = 0;
174             // search the nearset instance of this marker...
175             while ( t != -1 && ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
176             {
177                 // and check if it at the begining of a word.
178                 if ( t == 0 || isSpace( line.charAt( t - 1 ) ) || isParenthesis( line.charAt( t - 1 ) ) )
179                 {
180                     // if it is, and if, check to avoid going beyond the string
181                     if ( t + specialLen < line.length() )
182                     {
183                         // and if character after the format marker is another
184                         // marker, is an error, and should be ignored
185                         if ( isSpecial( line.charAt( t + specialLen ) ) )
186                         {
187                             t += specialLen;
188                         }
189                         else
190                         {
191                             // else we find a starter!
192                             break;
193                         }
194                     }
195                     else
196                     {
197                         t = -1;
198                     }
199                 }
200                 else
201                 {
202                     t += specialLen;
203                 }
204             }
205             lhOffsets[i] = t;
206         }
207 
208         // for each text format markers...
209         for ( int i = 0; i < lhOffsets.length; i++ )
210         {
211             final int specialLen = SPECIAL_CHAR[i].length();
212             // if we found a text format beginning
213             if ( lhOffsets[i] != -1 )
214             {
215                 int t = lhOffsets[i] + specialLen;
216                 // search for a text format ending
217                 while ( ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
218                 {
219                     // must be side by side to a word
220                     final char c = line.charAt( t - 1 );
221                     if ( t > 0 && !isSpace( c ) && !isSpecial( c ) )
222                     {
223                         break;
224                     }
225                     else
226                     {
227                         t += specialLen;
228                     }
229                 }
230                 rhOffsets[i] = t;
231             }
232         }
233 
234         // find the nearest index
235         int minIndex = -1;
236         int charType = 0;
237         for ( int i = 0; i < lhOffsets.length; i++ )
238         {
239             if ( lhOffsets[i] != -1 && rhOffsets[i] != 1 )
240             {
241                 if ( minIndex == -1 || lhOffsets[i] < minIndex )
242                 {
243                     if ( rhOffsets[i] > lhOffsets[i] )
244                     {
245                         // ej: "mary *has a little lamb"
246                         minIndex = lhOffsets[i];
247                         charType = i;
248                     }
249                 }
250             }
251         }
252 
253         if ( minIndex == -1 )
254         {
255             ret.addAll( textParser.parse( line ) );
256         }
257         else
258         {
259             int len = SPECIAL_CHAR[charType].length();
260             ret.addAll( parseFormat( line.substring( 0, minIndex ) ) );
261             ret.add( FACTORY_MAP.get( SPECIAL_CHAR[charType] )
262                      .createBlock( parseFormat( line.substring( minIndex + len, rhOffsets[charType] ) )
263                                    .toArray( new Block[] {} ) ) );
264             ret.addAll( parseFormat( line.substring( rhOffsets[charType] + len ) ) );
265         }
266 
267         // profit
268         return ret;
269     }
270 
271     /**
272      * @param c character to test
273      * @return <code>true</code> if c is a parenthesis
274      */
275     private boolean isParenthesis( final char c )
276     {
277         return c == '(' || c == ')';
278     }
279 
280     /**
281      * Sets the formatTextParser.
282      *
283      * @param textParser text parser to use
284      *                   <code>TextParser</code> with the formatTextParser.
285      */
286     public final void setTextParser( final TextParser textParser )
287     {
288         if ( textParser == null )
289         {
290             throw new IllegalArgumentException( "argument can't be null" );
291         }
292 
293         this.textParser = textParser;
294     }
295 }
296 
297 /**
298  * @author Juan F. Codagnone
299  */
300 interface FormatBlockFactory
301 {
302     /**
303      * factory method of format <code>Block</code>
304      *
305      * @param childrens children of the format block
306      * @return a format block
307      */
308     Block createBlock( final Block[] childrens );
309 }