1 package org.apache.maven.doxia.module.markdown;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import com.vladsch.flexmark.ast.Heading;
23 import com.vladsch.flexmark.ast.HtmlCommentBlock;
24 import com.vladsch.flexmark.util.ast.Node;
25 import com.vladsch.flexmark.ast.util.TextCollectingVisitor;
26 import com.vladsch.flexmark.html.HtmlRenderer;
27 import com.vladsch.flexmark.util.options.MutableDataSet;
28 import com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension;
29 import com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension;
30 import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
31 import com.vladsch.flexmark.ext.definition.DefinitionExtension;
32 import com.vladsch.flexmark.ext.typographic.TypographicExtension;
33 import com.vladsch.flexmark.ext.tables.TablesExtension;
34 import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
35 import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
36
37 import org.apache.commons.io.input.CharSequenceReader;
38 import org.apache.maven.doxia.markup.HtmlMarkup;
39 import org.apache.maven.doxia.module.xhtml.XhtmlParser;
40 import org.apache.maven.doxia.parser.AbstractParser;
41 import org.apache.maven.doxia.parser.ParseException;
42 import org.apache.maven.doxia.parser.Parser;
43 import org.apache.maven.doxia.sink.Sink;
44 import org.apache.maven.doxia.util.HtmlTools;
45 import org.codehaus.plexus.component.annotations.Component;
46 import org.codehaus.plexus.component.annotations.Requirement;
47 import org.codehaus.plexus.util.IOUtil;
48 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
49
50 import java.io.IOException;
51 import java.io.Reader;
52 import java.util.Arrays;
53 import java.util.regex.Matcher;
54 import java.util.regex.Pattern;
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70 @Component( role = Parser.class, hint = MarkdownParser.ROLE_HINT )
71 public class MarkdownParser
72 extends AbstractParser
73 {
74
75
76
77
78 public static final String ROLE_HINT = "markdown";
79
80
81
82
83
84
85
86
87 private static final Pattern METADATA_SECTION_PATTERN = Pattern.compile(
88 "\\A^\\s*"
89 + "(?:title|author|date|address|affiliation|copyright|email|keywords|language|phone|subtitle)"
90 + "[ \\t]*:[ \\t]*[^\\r\\n]*[ \\t]*$[\\r\\n]+"
91 + "(?:^[ \\t]*[^:\\r\\n]+[ \\t]*:[ \\t]*[^\\r\\n]*[ \\t]*$[\\r\\n]+)*",
92 Pattern.MULTILINE | Pattern.CASE_INSENSITIVE );
93
94
95
96
97 private static final Pattern METADATA_ENTRY_PATTERN = Pattern.compile(
98 "^[ \\t]*([^:\\r\\n]+?)[ \\t]*:[ \\t]*([^\\r\\n]*)[ \\t]*$",
99 Pattern.MULTILINE );
100
101
102
103
104
105
106 @Override
107 public int getType()
108 {
109 return TXT_TYPE;
110 }
111
112
113
114
115
116 @Requirement
117 private MarkdownHtmlParser parser;
118
119
120
121
122 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_PARSER;
123
124
125
126
127 private static final HtmlRenderer FLEXMARK_HTML_RENDERER;
128
129
130 static
131 {
132 MutableDataSet flexmarkOptions = new MutableDataSet();
133
134
135 flexmarkOptions.set( com.vladsch.flexmark.parser.Parser.EXTENSIONS, Arrays.asList(
136 EscapedCharacterExtension.create(),
137 AbbreviationExtension.create(),
138 AutolinkExtension.create(),
139 DefinitionExtension.create(),
140 TypographicExtension.create(),
141 TablesExtension.create(),
142 WikiLinkExtension.create(),
143 StrikethroughExtension.create()
144 ) );
145
146
147 flexmarkOptions.set( TypographicExtension.SINGLE_QUOTE_UNMATCHED, "'" );
148
149
150 flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_OPEN_TAG_EOL, false );
151 flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_CLOSE_TAG_EOL, false );
152 flexmarkOptions.set( HtmlRenderer.MAX_TRAILING_BLANK_LINES, -1 );
153
154
155 FLEXMARK_PARSER = com.vladsch.flexmark.parser.Parser.builder( flexmarkOptions ).build();
156
157
158 FLEXMARK_HTML_RENDERER = HtmlRenderer.builder( flexmarkOptions )
159 .linkResolverFactory( new FlexmarkDoxiaLinkResolver.Factory() )
160 .build();
161
162 }
163
164
165 @Override
166 public void parse( Reader source, Sink sink, String reference )
167 throws ParseException
168 {
169 try
170 {
171
172 CharSequence html = toHtml( source );
173
174
175 parser.parse( new CharSequenceReader( html ), sink );
176 }
177 catch ( IOException e )
178 {
179 throw new ParseException( "Failed reading Markdown source document", e );
180 }
181 }
182
183
184
185
186
187
188
189
190 CharSequence toHtml( Reader source )
191 throws IOException
192 {
193
194 String text = IOUtil.toString( source );
195
196
197 StringBuilder html = new StringBuilder( 1000 );
198 html.append( "<html>" );
199 html.append( "<head>" );
200
201
202 Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher( text );
203 boolean haveTitle = false;
204 if ( metadataMatcher.find() )
205 {
206 Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher( metadataMatcher.group( 0 ) );
207 while ( entryMatcher.find() )
208 {
209 String key = entryMatcher.group( 1 );
210 String value = entryMatcher.group( 2 );
211 if ( "title".equalsIgnoreCase( key ) )
212 {
213 haveTitle = true;
214 html.append( "<title>" );
215 html.append( HtmlTools.escapeHTML( value, false ) );
216 html.append( "</title>" );
217 }
218 else
219 {
220 html.append( "<meta name='" );
221 html.append( HtmlTools.escapeHTML( key ) );
222 html.append( "' content='" );
223 html.append( HtmlTools.escapeHTML( value ) );
224 html.append( "' />" );
225 }
226 }
227
228
229 text = text.substring( metadataMatcher.end( 0 ) );
230
231 }
232
233
234
235 Node documentRoot = FLEXMARK_PARSER.parse( text );
236
237
238
239 if ( !haveTitle && documentRoot.hasChildren() )
240 {
241
242 Node firstNode = documentRoot.getFirstChild();
243 while ( firstNode != null && firstNode instanceof HtmlCommentBlock )
244 {
245 firstNode = firstNode.getNext();
246 }
247
248
249 if ( firstNode != null && firstNode instanceof Heading )
250 {
251 html.append( "<title>" );
252 TextCollectingVisitor collectingVisitor = new TextCollectingVisitor();
253 String headingText = collectingVisitor.collectAndGetText( firstNode );
254 html.append( HtmlTools.escapeHTML( headingText, false ) );
255 html.append( "</title>" );
256 }
257 }
258 html.append( "</head>" );
259 html.append( "<body>" );
260
261
262 FLEXMARK_HTML_RENDERER.render( documentRoot, html );
263
264 html.append( "</body>" );
265 html.append( "</html>" );
266
267 return html;
268 }
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283 @Component( role = MarkdownHtmlParser.class )
284 public static class MarkdownHtmlParser
285 extends XhtmlParser
286 {
287 public MarkdownHtmlParser()
288 {
289 super();
290 }
291
292 @Override
293 protected void init()
294 {
295 super.init();
296 super.boxed = true;
297 }
298
299 @Override
300 protected boolean baseEndTag( XmlPullParser parser, Sink sink )
301 {
302 boolean visited = super.baseEndTag( parser, sink );
303 if ( !visited )
304 {
305 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
306 {
307 handleUnknown( parser, sink, TAG_TYPE_END );
308 visited = true;
309 }
310 }
311 return visited;
312 }
313
314 @Override
315 protected boolean baseStartTag( XmlPullParser parser, Sink sink )
316 {
317 boolean visited = super.baseStartTag( parser, sink );
318 if ( !visited )
319 {
320 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
321 {
322 handleUnknown( parser, sink, TAG_TYPE_START );
323 super.boxed = true;
324 visited = true;
325 }
326 }
327 return visited;
328 }
329 }
330 }