001package org.apache.maven.doxia.module.twiki.parser;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026
027/**
028 * Parse looking for formated text (bold, italic, ...)
029 *
030 * @author Juan F. Codagnone
031 * @version $Id$
032 */
033public class FormatedTextParser
034{
035    /**
036     * parser used to parse text...
037     */
038    private TextParser textParser;
039
040    /**
041     * map used to create blocks dependening on the text format
042     */
043    private static final Map<String, FormatBlockFactory> FACTORY_MAP = new HashMap<String, FormatBlockFactory>();
044
045    /**
046     * creates bold blocks
047     */
048    private static final FormatBlockFactory BOLD_FACTORY = new FormatBlockFactory()
049    {
050        /** {@inheritDoc} */
051        public Block createBlock( final Block[] childrens )
052        {
053            return new BoldBlock( childrens );
054        }
055    };
056
057    /**
058     * creates italic blocks
059     */
060    private static final FormatBlockFactory ITALIC_FACTORY = new FormatBlockFactory()
061    {
062        /** {@inheritDoc} */
063        public Block createBlock( final Block[] childrens )
064        {
065            return new ItalicBlock( childrens );
066        }
067    };
068
069    /**
070     * creates monospaced blocks
071     */
072    private static final FormatBlockFactory MONOSPACED_FACTORY = new FormatBlockFactory()
073    {
074        /** {@inheritDoc} */
075        public Block createBlock( final Block[] childrens )
076        {
077            return new MonospaceBlock( childrens );
078        }
079    };
080
081    /**
082     * creates bold italic blocks
083     */
084    private static final FormatBlockFactory BOLDITALIC_FACTORY = new FormatBlockFactory()
085    {
086        /** {@inheritDoc} */
087        public Block createBlock( final Block[] childrens )
088        {
089            return new BoldBlock( new Block[] { new ItalicBlock( childrens ) } );
090        }
091    };
092
093    /**
094     * creates bold monospace blocks
095     */
096    private static final FormatBlockFactory BOLDMONO_FACTORY = new FormatBlockFactory()
097    {
098        /** {@inheritDoc} */
099        public Block createBlock( final Block[] childrens )
100        {
101            return new BoldBlock( new Block[] { new MonospaceBlock( childrens ) } );
102        }
103    };
104
105    /**
106     * format characters
107     */
108    private static final String[] SPECIAL_CHAR = new String[] { "__", "==", "*", "_", "=" };
109
110    static
111    {
112        FACTORY_MAP.put( "*", BOLD_FACTORY );
113        FACTORY_MAP.put( "_", ITALIC_FACTORY );
114        FACTORY_MAP.put( "=", MONOSPACED_FACTORY );
115        FACTORY_MAP.put( "__", BOLDITALIC_FACTORY );
116        FACTORY_MAP.put( "==", BOLDMONO_FACTORY );
117    }
118
119    /**
120     * @param line line to parse
121     * @return TextBlock, ItalicBlock, BoldBlock, MonospacedBlock, ...
122     */
123    final Block[] parse( final String line )
124    {
125        return parseFormat( line ).toArray( new Block[] {} );
126    }
127
128    /**
129     * @param c character to test
130     * @return <code>true</code> if c is a space character
131     */
132    static boolean isSpace( final char c )
133    {
134        return c == ' ' || c == '\t';
135    }
136
137    /**
138     * @param c character to test
139     * @return <code>true</code> if c is a character that limits the formats
140     */
141    static boolean isSpecial( final char c )
142    {
143        boolean ret = false;
144
145        for ( int i = 0; !ret && i < SPECIAL_CHAR.length; i++ )
146        {
147            if ( SPECIAL_CHAR[i].charAt( 0 ) == c )
148            {
149                ret = true;
150            }
151        }
152
153        return ret;
154    }
155
156    /**
157     * Parse text format (bold, italic...)
158     * <p/>
159     * TODO too many lines!!
160     *
161     * @param line line to parse
162     * @return list of blocks
163     */
164    private List<Block> parseFormat( final String line )
165    {
166        final List<Block> ret = new ArrayList<Block>();
167        final int[] lhOffsets = new int[SPECIAL_CHAR.length];
168        final int[] rhOffsets = new int[SPECIAL_CHAR.length];
169
170        // for each text format markers...
171        for ( int i = 0; i < SPECIAL_CHAR.length; i++ )
172        {
173            final int specialLen = SPECIAL_CHAR[i].length();
174            int t = 0;
175            // search the nearset instance of this marker...
176            while ( t != -1 && ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
177            {
178                // and check if it at the begining of a word.
179                if ( t == 0 || isSpace( line.charAt( t - 1 ) ) || isParenthesis( line.charAt( t - 1 ) ) )
180                {
181                    // if it is, and if, check to avoid going beyond the string
182                    if ( t + specialLen < line.length() )
183                    {
184                        // and if character after the format marker is another
185                        // marker, is an error, and should be ignored
186                        if ( isSpecial( line.charAt( t + specialLen ) ) )
187                        {
188                            t += specialLen;
189                        }
190                        else
191                        {
192                            // else we find a starter!
193                            break;
194                        }
195                    }
196                    else
197                    {
198                        t = -1;
199                    }
200                }
201                else
202                {
203                    t += specialLen;
204                }
205            }
206            lhOffsets[i] = t;
207        }
208
209        // for each text format markers...
210        for ( int i = 0; i < lhOffsets.length; i++ )
211        {
212            final int specialLen = SPECIAL_CHAR[i].length();
213            // if we found a text format beginning
214            if ( lhOffsets[i] != -1 )
215            {
216                int t = lhOffsets[i] + specialLen;
217                // search for a text format ending
218                while ( ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
219                {
220                    // must be side by side to a word
221                    final char c = line.charAt( t - 1 );
222                    if ( t > 0 && !isSpace( c ) && !isSpecial( c ) )
223                    {
224                        break;
225                    }
226                    else
227                    {
228                        t += specialLen;
229                    }
230                }
231                rhOffsets[i] = t;
232            }
233        }
234
235        // find the nearest index
236        int minIndex = -1;
237        int charType = 0;
238        for ( int i = 0; i < lhOffsets.length; i++ )
239        {
240            if ( lhOffsets[i] != -1 && rhOffsets[i] != 1 )
241            {
242                if ( minIndex == -1 || lhOffsets[i] < minIndex )
243                {
244                    if ( rhOffsets[i] > lhOffsets[i] )
245                    {
246                        // ej: "mary *has a little lamb"
247                        minIndex = lhOffsets[i];
248                        charType = i;
249                    }
250                }
251            }
252        }
253
254        if ( minIndex == -1 )
255        {
256            ret.addAll( textParser.parse( line ) );
257        }
258        else
259        {
260            int len = SPECIAL_CHAR[charType].length();
261            ret.addAll( parseFormat( line.substring( 0, minIndex ) ) );
262            ret.add( FACTORY_MAP.get( SPECIAL_CHAR[charType] )
263                     .createBlock( parseFormat( line.substring( minIndex + len, rhOffsets[charType] ) )
264                                   .toArray( new Block[] {} ) ) );
265            ret.addAll( parseFormat( line.substring( rhOffsets[charType] + len ) ) );
266        }
267
268        // profit
269        return ret;
270    }
271
272    /**
273     * @param c character to test
274     * @return <code>true</code> if c is a parenthesis
275     */
276    private boolean isParenthesis( final char c )
277    {
278        return c == '(' || c == ')';
279    }
280
281    /**
282     * Sets the formatTextParser.
283     *
284     * @param textParser text parser to use
285     *                   <code>TextParser</code> with the formatTextParser.
286     */
287    public final void setTextParser( final TextParser textParser )
288    {
289        if ( textParser == null )
290        {
291            throw new IllegalArgumentException( "argument can't be null" );
292        }
293
294        this.textParser = textParser;
295    }
296}
297
298/**
299 * @author Juan F. Codagnone
300 * @version $Id$
301 */
302interface FormatBlockFactory
303{
304    /**
305     * factory method of format <code>Block</code>
306     *
307     * @param childrens children of the format block
308     * @return a format block
309     */
310    Block createBlock( final Block[] childrens );
311}