001package org.apache.archiva.xml;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *  http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import org.apache.commons.lang.StringUtils;
023import org.dom4j.Attribute;
024import org.dom4j.Document;
025import org.dom4j.DocumentException;
026import org.dom4j.Element;
027import org.dom4j.Namespace;
028import org.dom4j.Node;
029import org.dom4j.QName;
030import org.dom4j.XPath;
031import org.dom4j.io.SAXReader;
032
033import java.io.File;
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.InputStreamReader;
037import java.net.MalformedURLException;
038import java.net.URL;
039import java.nio.charset.Charset;
040import java.util.ArrayList;
041import java.util.HashMap;
042import java.util.Iterator;
043import java.util.List;
044import java.util.Map;
045
046/**
047 * XMLReader - a set of common xml utility methods for reading content out of an xml file.
048 */
049public class XMLReader
050{
051    private URL xmlUrl;
052
053    private String documentType;
054
055    private Document document;
056
057    private Map<String, String> namespaceMap = new HashMap<>();
058
059    public XMLReader( String type, File file )
060        throws XMLException
061    {
062        if ( !file.exists() )
063        {
064            throw new XMLException( "file does not exist: " + file.getAbsolutePath() );
065        }
066
067        if ( !file.isFile() )
068        {
069            throw new XMLException( "path is not a file: " + file.getAbsolutePath() );
070        }
071
072        if ( !file.canRead() )
073        {
074            throw new XMLException( "Cannot read xml file due to permissions: " + file.getAbsolutePath() );
075        }
076
077        try
078        {
079            init( type, file.toURL() );
080        }
081        catch ( MalformedURLException e )
082        {
083            throw new XMLException( "Unable to translate file " + file + " to URL: " + e.getMessage(), e );
084        }
085    }
086
087    public XMLReader( String type, URL url )
088        throws XMLException
089    {
090        init( type, url );
091    }
092
093    private void init( String type, URL url )
094        throws XMLException
095    {
096        this.documentType = type;
097        this.xmlUrl = url;
098
099        SAXReader reader = new SAXReader();
100
101        try (InputStream in = url.openStream())
102        {
103            InputStreamReader inReader = new InputStreamReader( in, Charset.forName( "UTF-8" ) );
104            LatinEntityResolutionReader latinReader = new LatinEntityResolutionReader( inReader );
105            this.document = reader.read( latinReader );
106        }
107        catch ( DocumentException e )
108        {
109            throw new XMLException( "Unable to parse " + documentType + " xml " + xmlUrl + ": " + e.getMessage(), e );
110        }
111        catch ( IOException e )
112        {
113            throw new XMLException( "Unable to open stream to " + url + ": " + e.getMessage(), e );
114        }
115
116        Element root = this.document.getRootElement();
117        if ( root == null )
118        {
119            throw new XMLException( "Invalid " + documentType + " xml: root element is null." );
120        }
121
122        if ( !StringUtils.equals( root.getName(), documentType ) )
123        {
124            throw new XMLException(
125                "Invalid " + documentType + " xml: Unexpected root element <" + root.getName() + ">, expected <"
126                    + documentType + ">" );
127        }
128    }
129
130    public String getDefaultNamespaceURI()
131    {
132        Namespace namespace = this.document.getRootElement().getNamespace();
133        return namespace.getURI();
134    }
135
136    public void addNamespaceMapping( String elementName, String uri )
137    {
138        this.namespaceMap.put( elementName, uri );
139    }
140
141    public Element getElement( String xpathExpr )
142        throws XMLException
143    {
144        XPath xpath = createXPath( xpathExpr );
145        Object evaluated = xpath.selectSingleNode( document );
146
147        if ( evaluated == null )
148        {
149            return null;
150        }
151
152        if ( evaluated instanceof Element )
153        {
154            return (Element) evaluated;
155        }
156        else
157        {
158            // Unknown evaluated type.
159            throw new XMLException( ".getElement( Expr: " + xpathExpr + " ) resulted in non-Element type -> ("
160                                        + evaluated.getClass().getName() + ") " + evaluated );
161        }
162    }
163
164    private XPath createXPath( String xpathExpr )
165    {
166        XPath xpath = document.createXPath( xpathExpr );
167        if ( !this.namespaceMap.isEmpty() )
168        {
169            xpath.setNamespaceURIs( this.namespaceMap );
170        }
171        return xpath;
172    }
173
174    public boolean hasElement( String xpathExpr )
175        throws XMLException
176    {
177        XPath xpath = createXPath( xpathExpr );
178        Object evaluated = xpath.selectSingleNode( document );
179
180        if ( evaluated == null )
181        {
182            return false;
183        }
184
185        return true;
186    }
187
188    /**
189     * Remove namespaces from entire document.
190     */
191    public void removeNamespaces()
192    {
193        removeNamespaces( this.document.getRootElement() );
194    }
195
196    /**
197     * Remove namespaces from element recursively.
198     */
199    @SuppressWarnings("unchecked")
200    public void removeNamespaces( Element elem )
201    {
202        elem.setQName( QName.get( elem.getName(), Namespace.NO_NAMESPACE, elem.getQualifiedName() ) );
203
204        Node n;
205
206        Iterator<Node> it = elem.elementIterator();
207        while ( it.hasNext() )
208        {
209            n = it.next();
210
211            switch ( n.getNodeType() )
212            {
213                case Node.ATTRIBUTE_NODE:
214                    ( (Attribute) n ).setNamespace( Namespace.NO_NAMESPACE );
215                    break;
216                case Node.ELEMENT_NODE:
217                    removeNamespaces( (Element) n );
218                    break;
219            }
220        }
221    }
222
223    public String getElementText( Node context, String xpathExpr )
224        throws XMLException
225    {
226        XPath xpath = createXPath( xpathExpr );
227        Object evaluated = xpath.selectSingleNode( context );
228
229        if ( evaluated == null )
230        {
231            return null;
232        }
233
234        if ( evaluated instanceof Element )
235        {
236            Element evalElem = (Element) evaluated;
237            return evalElem.getTextTrim();
238        }
239        else
240        {
241            // Unknown evaluated type.
242            throw new XMLException( ".getElementText( Node, Expr: " + xpathExpr + " ) resulted in non-Element type -> ("
243                                        + evaluated.getClass().getName() + ") " + evaluated );
244        }
245    }
246
247    public String getElementText( String xpathExpr )
248        throws XMLException
249    {
250        XPath xpath = createXPath( xpathExpr );
251        Object evaluated = xpath.selectSingleNode( document );
252
253        if ( evaluated == null )
254        {
255            return null;
256        }
257
258        if ( evaluated instanceof Element )
259        {
260            Element evalElem = (Element) evaluated;
261            return evalElem.getTextTrim();
262        }
263        else
264        {
265            // Unknown evaluated type.
266            throw new XMLException( ".getElementText( Expr: " + xpathExpr + " ) resulted in non-Element type -> ("
267                                        + evaluated.getClass().getName() + ") " + evaluated );
268        }
269    }
270
271    @SuppressWarnings("unchecked")
272    public List<Element> getElementList( String xpathExpr )
273        throws XMLException
274    {
275        XPath xpath = createXPath( xpathExpr );
276        Object evaluated = xpath.evaluate( document );
277
278        if ( evaluated == null )
279        {
280            return null;
281        }
282
283        /* The xpath.evaluate(Context) method can return:
284         *   1) A Collection or List of dom4j Nodes. 
285         *   2) A single dom4j Node.
286         */
287
288        if ( evaluated instanceof List )
289        {
290            return (List<Element>) evaluated;
291        }
292        else if ( evaluated instanceof Node )
293        {
294            List<Element> ret = new ArrayList<>();
295            ret.add( (Element) evaluated );
296            return ret;
297        }
298        else
299        {
300            // Unknown evaluated type.
301            throw new XMLException( ".getElementList( Expr: " + xpathExpr + " ) resulted in non-List type -> ("
302                                        + evaluated.getClass().getName() + ") " + evaluated );
303        }
304    }
305
306    public List<String> getElementListText( String xpathExpr )
307        throws XMLException
308    {
309        List<Element> elemList = getElementList( xpathExpr );
310        if ( elemList == null )
311        {
312            return null;
313        }
314
315        List<String> ret = new ArrayList<>();
316        for ( Iterator<Element> iter = elemList.iterator(); iter.hasNext(); )
317        {
318            Element listelem = iter.next();
319            ret.add( listelem.getTextTrim() );
320        }
321        return ret;
322    }
323
324}