1 package org.apache.maven.doxia.module.markdown;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import com.vladsch.flexmark.Extension;
23 import com.vladsch.flexmark.ast.Heading;
24 import com.vladsch.flexmark.ast.HtmlCommentBlock;
25 import com.vladsch.flexmark.ast.Node;
26 import com.vladsch.flexmark.ast.util.TextCollectingVisitor;
27 import com.vladsch.flexmark.html.HtmlRenderer;
28 import com.vladsch.flexmark.profiles.pegdown.Extensions;
29 import com.vladsch.flexmark.profiles.pegdown.PegdownOptionsAdapter;
30 import com.vladsch.flexmark.util.options.MutableDataHolder;
31 import org.apache.commons.lang3.StringEscapeUtils;
32 import org.apache.commons.lang3.StringUtils;
33 import org.apache.maven.doxia.markup.HtmlMarkup;
34 import org.apache.maven.doxia.module.xhtml.XhtmlParser;
35 import org.apache.maven.doxia.parser.AbstractParser;
36 import org.apache.maven.doxia.parser.ParseException;
37 import org.apache.maven.doxia.parser.Parser;
38 import org.apache.maven.doxia.sink.Sink;
39 import org.codehaus.plexus.component.annotations.Component;
40 import org.codehaus.plexus.component.annotations.Requirement;
41 import org.codehaus.plexus.util.IOUtil;
42 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
43
44 import java.io.IOException;
45 import java.io.Reader;
46 import java.io.StringReader;
47 import java.util.ArrayList;
48 import java.util.regex.Matcher;
49 import java.util.regex.Pattern;
50
51
52
53
54
55
56
57
58
59
60
61
62 @Component( role = Parser.class, hint = "markdown" )
63 public class MarkdownParser
64 extends AbstractParser
65 {
66
67
68
69
70 public static final String ROLE_HINT = "markdown";
71
72
73
74
75 private static final String MULTI_MARKDOWN_METADATA_SECTION =
76 "^(((?:[^\\s:][^:]*):(?:.*(?:\r?\n\\p{Blank}+[^\\s].*)*\r?\n))+)(?:\\s*\r?\n)";
77
78
79
80
81 private static final String MULTI_MARKDOWN_METADATA_ENTRY =
82 "([^\\s:][^:]*):(.*(?:\r?\n\\p{Blank}+[^\\s].*)*)\r?\n";
83
84
85
86
87
88
89 private static final String[] STANDARD_METADATA_KEYS =
90 { "title", "author", "date", "address", "affiliation", "copyright", "email", "keywords", "language", "phone",
91 "subtitle" };
92
93 public int getType()
94 {
95 return TXT_TYPE;
96 }
97
98 @Requirement
99 private MarkdownHtmlParser parser;
100
101 public void parse( Reader source, Sink sink )
102 throws ParseException
103 {
104 try
105 {
106
107 String html = toHtml( source );
108
109 parser.parse( new StringReader( html ), sink );
110 }
111 catch ( IOException e )
112 {
113 throw new ParseException( "Failed reading Markdown source document", e );
114 }
115 }
116
117
118
119
120
121
122
123
124 private String toHtml( Reader source )
125 throws IOException
126 {
127 String text = IOUtil.toString( source );
128 MutableDataHolder flexmarkOptions = PegdownOptionsAdapter.flexmarkOptions(
129 Extensions.ALL & ~( Extensions.HARDWRAPS | Extensions.ANCHORLINKS ) ).toMutable();
130 ArrayList<Extension> extensions = new ArrayList<Extension>();
131 for ( Extension extension : flexmarkOptions.get( com.vladsch.flexmark.parser.Parser.EXTENSIONS ) )
132 {
133 extensions.add( extension );
134 }
135
136 extensions.add( FlexmarkDoxiaExtension.create() );
137 flexmarkOptions.set( com.vladsch.flexmark.parser.Parser.EXTENSIONS, extensions );
138 flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_OPEN_TAG_EOL, false );
139 flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_CLOSE_TAG_EOL, false );
140 flexmarkOptions.set( HtmlRenderer.MAX_TRAILING_BLANK_LINES, -1 );
141
142 com.vladsch.flexmark.parser.Parser parser = com.vladsch.flexmark.parser.Parser.builder( flexmarkOptions )
143 .build();
144 HtmlRenderer renderer = HtmlRenderer.builder( flexmarkOptions ).build();
145
146 StringBuilder html = new StringBuilder( 1000 );
147 html.append( "<html>" );
148 html.append( "<head>" );
149 Pattern metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_SECTION, Pattern.MULTILINE );
150 Matcher metadataMatcher = metadataPattern.matcher( text );
151 boolean haveTitle = false;
152 if ( metadataMatcher.find() )
153 {
154 metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_ENTRY, Pattern.MULTILINE );
155 Matcher lineMatcher = metadataPattern.matcher( metadataMatcher.group( 1 ) );
156 boolean first = true;
157 while ( lineMatcher.find() )
158 {
159 String key = StringUtils.trimToEmpty( lineMatcher.group( 1 ) );
160 if ( first )
161 {
162 boolean found = false;
163 for ( String k : STANDARD_METADATA_KEYS )
164 {
165 if ( k.equalsIgnoreCase( key ) )
166 {
167 found = true;
168 break;
169 }
170 }
171 if ( !found )
172 {
173 break;
174 }
175 first = false;
176 }
177 String value = StringUtils.trimToEmpty( lineMatcher.group( 2 ) );
178 if ( "title".equalsIgnoreCase( key ) )
179 {
180 haveTitle = true;
181 html.append( "<title>" );
182 html.append( StringEscapeUtils.escapeXml( value ) );
183 html.append( "</title>" );
184 }
185 else if ( "author".equalsIgnoreCase( key ) )
186 {
187 html.append( "<meta name=\'author\' content=\'" );
188 html.append( StringEscapeUtils.escapeXml( value ) );
189 html.append( "\' />" );
190 }
191 else if ( "date".equalsIgnoreCase( key ) )
192 {
193 html.append( "<meta name=\'date\' content=\'" );
194 html.append( StringEscapeUtils.escapeXml( value ) );
195 html.append( "\' />" );
196 }
197 else
198 {
199 html.append( "<meta name=\'" );
200 html.append( StringEscapeUtils.escapeXml( key ) );
201 html.append( "\' content=\'" );
202 html.append( StringEscapeUtils.escapeXml( value ) );
203 html.append( "\' />" );
204 }
205 }
206 if ( !first )
207 {
208 text = text.substring( metadataMatcher.end() );
209 }
210 }
211
212 Node rootNode = parser.parse( text );
213 String markdownHtml = renderer.render( rootNode );
214
215 if ( !haveTitle && rootNode.hasChildren() )
216 {
217
218 Node firstNode = rootNode.getFirstChild();
219 while ( firstNode != null && !( firstNode instanceof Heading ) )
220 {
221 if ( !( firstNode instanceof HtmlCommentBlock ) )
222 {
223 break;
224 }
225 firstNode = firstNode.getNext();
226 }
227
228 if ( firstNode instanceof Heading )
229 {
230 html.append( "<title>" );
231 TextCollectingVisitor collectingVisitor = new TextCollectingVisitor();
232 String headingText = collectingVisitor.collectAndGetText( firstNode );
233 html.append( StringEscapeUtils.escapeXml( headingText ) );
234 html.append( "</title>" );
235 }
236 }
237 html.append( "</head>" );
238 html.append( "<body>" );
239 html.append( markdownHtml );
240 html.append( "</body>" );
241 html.append( "</html>" );
242
243 return html.toString();
244 }
245
246
247
248
249 @Component( role = MarkdownHtmlParser.class )
250 public static class MarkdownHtmlParser
251 extends XhtmlParser
252 {
253 public MarkdownHtmlParser()
254 {
255 super();
256 }
257
258 @Override
259 protected boolean baseEndTag( XmlPullParser parser, Sink sink )
260 {
261 boolean visited = super.baseEndTag( parser, sink );
262 if ( !visited )
263 {
264 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
265 {
266 handleUnknown( parser, sink, TAG_TYPE_END );
267 visited = true;
268 }
269 }
270 return visited;
271 }
272
273 @Override
274 protected boolean baseStartTag( XmlPullParser parser, Sink sink )
275 {
276 boolean visited = super.baseStartTag( parser, sink );
277 if ( !visited )
278 {
279 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
280 {
281 handleUnknown( parser, sink, TAG_TYPE_START );
282 visited = true;
283 }
284 }
285 return visited;
286 }
287 }
288 }