View Javadoc
1   package org.apache.maven.doxia.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.StringReader;
24  
25  import java.util.regex.Matcher;
26  import java.util.regex.Pattern;
27  
28  import javax.xml.XMLConstants;
29  
30  import org.apache.maven.doxia.logging.Log;
31  import org.apache.maven.doxia.markup.XmlMarkup;
32  import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
33  import org.apache.maven.doxia.parser.ParseException;
34  
35  import org.xml.sax.InputSource;
36  import org.xml.sax.SAXException;
37  import org.xml.sax.SAXParseException;
38  import org.xml.sax.XMLReader;
39  import org.xml.sax.helpers.DefaultHandler;
40  import org.xml.sax.helpers.XMLReaderFactory;
41  
42  /**
43   * A class to validate xml documents.
44   *
45   * @since 1.1.3
46   */
47  public class XmlValidator
48  {
49      /**
50       * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
51       * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
52       */
53      private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" );
54  
55      /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
56      private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
57  
58      /** lazy xmlReader to validate xml content*/
59      private XMLReader xmlReader;
60  
61      private Log logger;
62  
63      /**
64       * Constructor.
65       *
66       * @param log a logger, not null.
67       */
68      public XmlValidator( Log log )
69      {
70          this.logger = log;
71      }
72  
73      /**
74       * Validate an XML content with SAX.
75       *
76       * @param content a not null xml content
77       * @throws ParseException if any.
78       */
79      public void validate( String content )
80          throws ParseException
81      {
82          try
83          {
84              // 1 if there's a doctype
85              boolean hasDoctype = false;
86              Matcher matcher = PATTERN_DOCTYPE.matcher( content );
87              if ( matcher.find() )
88              {
89                  hasDoctype = true;
90              }
91  
92              // 2 check for an xmlns instance
93              boolean hasXsd = false;
94              matcher = PATTERN_TAG.matcher( content );
95              if ( matcher.find() )
96              {
97                  String value = matcher.group( 2 );
98  
99                  if ( value.contains( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) )
100                 {
101                     hasXsd = true;
102                 }
103             }
104 
105             // 3 validate content
106             getLog().debug( "Validating the content..." );
107             getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
108         }
109         catch ( IOException | SAXException e )
110         {
111             throw new ParseException( "Error validating the model: " + e.getMessage(), e );
112         }
113     }
114 
115     /**
116      * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
117      * @return an xmlReader instance.
118      * @throws SAXException if any
119      */
120     private XMLReader getXmlReader( boolean hasDtdAndXsd )
121         throws SAXException
122     {
123         if ( xmlReader == null )
124         {
125             MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
126 
127             xmlReader = XMLReaderFactory.createXMLReader();
128             xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
129             xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
130             xmlReader.setErrorHandler( errorHandler );
131             xmlReader.setEntityResolver( new CachedFileEntityResolver() );
132         }
133 
134         ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
135 
136         return xmlReader;
137     }
138 
139     private Log getLog()
140     {
141         return logger;
142     }
143 
144     /**
145      * Convenience class to beautify <code>SAXParseException</code> messages.
146      */
147     private static class MessagesErrorHandler
148         extends DefaultHandler
149     {
150         private static final int TYPE_UNKNOWN = 0;
151 
152         private static final int TYPE_WARNING = 1;
153 
154         private static final int TYPE_ERROR = 2;
155 
156         private static final int TYPE_FATAL = 3;
157 
158         private static final String EOL = XmlMarkup.EOL;
159 
160         /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
161         private static final Pattern ELEMENT_TYPE_PATTERN =
162             Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
163 
164         private final Log log;
165 
166         private boolean hasDtdAndXsd;
167 
168         private MessagesErrorHandler( Log log )
169         {
170             this.log = log;
171         }
172 
173         /**
174          * @param hasDtdAndXsd the hasDtdAndXsd to set
175          */
176         protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
177         {
178             this.hasDtdAndXsd = hasDtdAndXsd;
179         }
180 
181         /** {@inheritDoc} */
182         @Override
183         public void warning( SAXParseException e )
184             throws SAXException
185         {
186             processException( TYPE_WARNING, e );
187         }
188 
189         /** {@inheritDoc} */
190         @Override
191         public void error( SAXParseException e )
192             throws SAXException
193         {
194             // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
195             // like &nbsp;
196             // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
197             if ( !hasDtdAndXsd )
198             {
199                 processException( TYPE_ERROR, e );
200                 return;
201             }
202 
203             Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
204             if ( !m.find() )
205             {
206                 processException( TYPE_ERROR, e );
207             }
208         }
209 
210         /** {@inheritDoc} */
211         @Override
212         public void fatalError( SAXParseException e )
213             throws SAXException
214         {
215             processException( TYPE_FATAL, e );
216         }
217 
218         private void processException( int type, SAXParseException e )
219             throws SAXException
220         {
221             StringBuilder message = new StringBuilder();
222 
223             switch ( type )
224             {
225                 case TYPE_WARNING:
226                     message.append( "Warning:" );
227                     break;
228 
229                 case TYPE_ERROR:
230                     message.append( "Error:" );
231                     break;
232 
233                 case TYPE_FATAL:
234                     message.append( "Fatal error:" );
235                     break;
236 
237                 case TYPE_UNKNOWN:
238                 default:
239                     message.append( "Unknown:" );
240                     break;
241             }
242 
243             message.append( EOL );
244             message.append( "  Public ID: " ).append( e.getPublicId() ).append( EOL );
245             message.append( "  System ID: " ).append( e.getSystemId() ).append( EOL );
246             message.append( "  Line number: " ).append( e.getLineNumber() ).append( EOL );
247             message.append( "  Column number: " ).append( e.getColumnNumber() ).append( EOL );
248             message.append( "  Message: " ).append( e.getMessage() ).append( EOL );
249 
250             final String logMessage = message.toString();
251 
252             switch ( type )
253             {
254                 case TYPE_WARNING:
255                     log.warn( logMessage );
256                     break;
257 
258                 case TYPE_UNKNOWN:
259                 case TYPE_ERROR:
260                 case TYPE_FATAL:
261                 default:
262                     throw new SAXException( logMessage );
263             }
264         }
265     }
266 }