Coverage Report

Coverage Report - org.apache.any23.extractor.html.DomUtils

Classes in this File

Line Coverage

Branch Coverage

Complexity

DomUtils

0/119

0/68

3.591

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.any23.extractor.html;
 
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerException;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpressionException;
 import javax.xml.xpath.XPathFactory;
 import java.io.IOException;
 import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.List;
 
 /**
  * This class provides utility methods for DOM manipulation.
  * It is separated from {@link HTMLDocument} so that its methods
  * can be run on single DOM nodes without having to wrap them
  * into an HTMLDocument.
  * We use a mix of XPath and DOM manipulation.
  * <p/>
  * This is likely to be a performance bottleneck but at least
  * everything is localized here.
  * <p/>
  */
 public class DomUtils {
 
     private static final String[] EMPTY_STRING_ARRAY = new String[0];
         
     private final static XPath xPathEngine = XPathFactory.newInstance().newXPath();
 
     private DomUtils(){}
 
     /**
      * Given a node this method returns the index corresponding to such node
      * within the list of the children of its parent node.
      *
      * @param n the node of which returning the index.
      * @return a non negative number.
      */
     public static int getIndexInParent(Node n) {
         Node parent = n.getParentNode();
         if(parent == null) {
             return 0;
         }
         NodeList nodes = parent.getChildNodes();
         int counter = -1;
         for(int i = 0; i < nodes.getLength(); i++) {
             Node current = nodes.item(i);
             if ( current.getNodeType() == n.getNodeType() && current.getNodeName().equals( n.getNodeName() ) ) {
                 counter++;
             }
             if( current.equals(n) ) {
                 return counter;
             }
         }
         throw new IllegalStateException("Cannot find a child within its parent node list.");
     }
 
     /**
      * Does a reverse walking of the DOM tree to generate a unique XPath
      * expression leading to this node. The XPath generated is the canonical
      * one based on sibling index: /html[1]/body[1]/div[2]/span[3] etc..
      *
      * @param node the input node.
      * @return the XPath location of node as String.
      */
     public static String getXPathForNode(Node node) {
         final StringBuilder sb = new StringBuilder();
         Node parent = node;
         while(parent != null && parent.getNodeType() != Node.DOCUMENT_NODE) {
             sb.insert(0, "]");
             sb.insert(0, getIndexInParent(parent) + 1);
             sb.insert(0, "[");
             sb.insert(0, parent.getNodeName());
             sb.insert(0, "/");
             parent = parent.getParentNode();
         }
         return sb.toString();
     }
 
     /**
      * Returns a list of tag names representing the path from
      * the document root to the given node <i>n</i>.
      *
      * @param n the node for which retrieve the path.
      * @return a sequence of HTML tag names.
      */
     public static String[] getXPathListForNode(Node n) {
         if(n == null) {
             return EMPTY_STRING_ARRAY;
         }
         List<String> ancestors = new ArrayList<String>();
         ancestors.add( String.format("%s[%s]", n.getNodeName(), getIndexInParent(n) ) );
         Node parent = n.getParentNode();
         while(parent != null) {
             ancestors.add(0, String.format("%s[%s]", parent.getNodeName(), getIndexInParent(parent) ) );
             parent = parent.getParentNode();
         }
         return ancestors.toArray( new String[ancestors.size()] );
     }
 
     /**
      * Returns the row/col location of the given node.
      *
      * @param n input node.
      * @return an array of two elements of type
      *         <code>[&lt;begin-row&gt;, &lt;begin-col&gt;, &lt;end-row&gt; &lt;end-col&gt;]</code>
      *         or <code>null</code> if not possible to extract such data.
      */
     public static int[] getNodeLocation(Node n) {
         if(n == null) throw new NullPointerException("node cannot be null.");
         final TagSoupParser.ElementLocation elementLocation =
             (TagSoupParser.ElementLocation) n.getUserData( TagSoupParser.ELEMENT_LOCATION );
         if(elementLocation == null) return null;
         return new int[]{
                 elementLocation.getBeginLineNumber(),
                 elementLocation.getBeginColumnNumber(),
                 elementLocation.getEndLineNumber(),
                 elementLocation.getEndColumnNumber()
         };
     }
 
     /**
      * Checks whether a node is ancestor or same of another node.
      *
      * @param candidateAncestor the candidate ancestor node.
      * @param candidateSibling the candidate sibling node.
      * @param strict if <code>true</code> is not allowed that the ancestor and sibling can be the same node.
      * @return <code>true</code> if <code>candidateSibling</code> is ancestor of <code>candidateSibling</code>,
      *         <code>false</code> otherwise.
      */
     public static boolean isAncestorOf(Node candidateAncestor, Node candidateSibling, boolean strict) {
         if(candidateAncestor == null) throw new NullPointerException("candidate ancestor cannot be null null.");
         if(candidateSibling  == null) throw new NullPointerException("candidate sibling cannot be null null." );
         if(strict && candidateAncestor.equals(candidateSibling)) return false;
         Node parent = candidateSibling;
         while(parent != null) {
             if(parent.equals(candidateAncestor)) return true;
             parent = parent.getParentNode();
         }
         return false;
     }
 
     /**
      * Checks whether a node is ancestor or same of another node. As
      * {@link #isAncestorOf(org.w3c.dom.Node, org.w3c.dom.Node, boolean)} with <code>strict=false</code>.
      *
      * @param candidateAncestor the candidate ancestor node.
      * @param candidateSibling the candidate sibling node.
      * @return <code>true</code> if <code>candidateSibling</code> is ancestor of <code>candidateSibling</code>,
      *         <code>false</code> otherwise.
      */
     public static boolean isAncestorOf(Node candidateAncestor, Node candidateSibling) {
         return isAncestorOf(candidateAncestor, candidateSibling, false);
     }
 
     /**
      * Finds all nodes that have a declared class.
      * Note that the className is transformed to lower case before being
      * matched against the DOM.
      * @param root the root node from which start searching.
      * @param className the name of the filtered class.
      * @return list of matching nodes or an empty list.
      */
     public static List<Node> findAllByClassName(Node root, String className) {
         return findAllByTagAndClassName(root, "*", className.toLowerCase());
     }
 
     /**
      * Finds all nodes that have a declared attribute.
      * Note that the className is transformed to lower case before being
      * matched against the DOM.
      * @param root the root node from which start searching.
      * @param attrName the name of the filtered attribue.
      * @return list of matching nodes or an empty list.
      */
     public static List<Node> findAllByAttributeName(Node root, String attrName) {
         List<Node> result = new ArrayList<Node>();
         for (Node node : findAll(root, String.format("./descendant-or-self::*[@%s]", attrName) ) ) {
                 result.add(node);
         }
         return result;
     }
 
     public static List<Node> findAllByTag(Node root, String tagName) {
         List<Node> result = new ArrayList<Node>();
         for (Node node : findAll(root, "./descendant-or-self::" + tagName)) {
             result.add(node);
         }
         return result;
     }
 
     public static List<Node> findAllByTagAndClassName(Node root, String tagName, String className) {
         List<Node> result = new ArrayList<Node>();
         for (Node node : findAll(
                 root,
                 "./descendant-or-self::" +
                 tagName +
                 "[contains(translate(@class,'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'),'" +
                 className + "')]")
         ) {
             if (DomUtils.hasClassName(node, className)) {
                 result.add(node);
             }
         }
         return result;
     }
 
     /**
      * Mimics the JS DOM API, or prototype's $()
      */
     public static Node findNodeById(Node root, String id) {
         Node node;
         try {
             String xpath = "//*[@id='" + id + "']";
             node = (Node) xPathEngine.evaluate(xpath, root, XPathConstants.NODE);
         } catch (XPathExpressionException ex) {
             throw new RuntimeException("Should not happen", ex);
         }
         return node;
     }
 
     /**
      * Returns a NodeList composed of all the nodes that match an XPath
      * expression, which must be valid.
      */
     public static List<Node> findAll(Node node, String xpath) {
         if(node == null) {
             throw new NullPointerException("node cannot be null.");
         }
         try {
             NodeList nodes = (NodeList) xPathEngine.evaluate(xpath, node, XPathConstants.NODESET);
             List<Node> result = new ArrayList<Node>(nodes.getLength());
             for (int i = 0; i < nodes.getLength(); i++) {
                 result.add(nodes.item(i));
             }
             return result;
         } catch (XPathExpressionException ex) {
             throw new IllegalArgumentException("Illegal XPath expression: " + xpath, ex);
         }
     }
 
     /**
      * Gets the string value of an XPath expression.
      */
     public static String find(Node node, String xpath) {
         try {
             String val = (String) xPathEngine.evaluate(xpath, node, XPathConstants.STRING);
             if (null == val)
                 return "";
             return val;
         } catch (XPathExpressionException ex) {
             throw new IllegalArgumentException("Illegal XPath expression: " + xpath, ex);
         }
     }
 
     /**
      * Tells if an element has a class name <b>not checking the parents
      * in the hierarchy</b> mimicking the <i>CSS</i> .foo match.
      */
     public static boolean hasClassName(Node node, String className) {
         return hasAttribute(node, "class", className);
     }
 
     /**
      * Checks the presence of an attribute value in attributes that
      * contain whitespace-separated lists of values. The semantic is the
      * CSS classes' ones: "foo" matches "bar foo", "foo" but not "foob"
      */
     public static boolean hasAttribute(Node node, String attributeName, String className) {
         // regex love, maybe faster but less easy to understand
         // Pattern pattern = Pattern.compile("(^|\\s+)"+className+"(\\s+|$)");
         String attr = readAttribute(node, attributeName);
         for (String c : attr.split("\\s+"))
             if (c.equalsIgnoreCase(className))
                 return true;
         return false;
     }
 
      /**
      * Checks the presence of an attribute in the given <code>node</code>.
       *
       * @param node the node container.
       * @param attributeName the name of the attribute.
       */
     public static boolean hasAttribute(Node node, String attributeName) {
         return readAttribute(node, attributeName, null) != null;
     }
 
     /**
      * Verifies if the given target node is an element.
      *
      * @param target
      * @return <code>true</code> if the element the node is an element,
      *         <code>false</code> otherwise.
      */
     public static boolean isElementNode(Node target) {
         return Node.ELEMENT_NODE == target.getNodeType();
     }
 
     /**
      * Reads the value of the specified <code>attribute</code>, returning the
      * <code>defaultValue</code> string if not present.
      *
      * @param node node to read the attribute.
      * @param attribute attribute name.
      * @param defaultValue the default value to return if attribute is not found.
      * @return the attribute value or <code>defaultValue</code> if not found.
      */
     public static String readAttribute(Node node, String attribute, String defaultValue) {
         NamedNodeMap attributes = node.getAttributes();
         if (null == attributes)
             return defaultValue;
         Node attr = attributes.getNamedItem(attribute);
         if (null==attr)
             return defaultValue;
         return attr.getNodeValue();
     }
 
     /**
      * Reads the value of the first <i>attribute</i> which name matches with the specified <code>attributePrefix</code>.
      * Returns the <code>defaultValue</code> if not found.
      *
      * @param node node to look for attributes.
      * @param attributePrefix attribute prefix.
      * @param defaultValue default returned value.
      * @return the value found or default.
      */
     public static String readAttributeWithPrefix(Node node, String attributePrefix, String defaultValue) {
         final NamedNodeMap attributes = node.getAttributes();
         if (null == attributes) {
             return defaultValue;
         }
         Node attribute;
         for (int a = 0; a < attributes.getLength(); a++) {
             attribute = attributes.item(a);
             if (attribute.getNodeName().startsWith(attributePrefix)) {
                 return attribute.getNodeValue();
             }
         }
         return defaultValue;
     }
 
     /**
      * Reads the value of an <code>attribute</code>, returning the
      * empty string if not present.
      *
      * @param node node to read the attribute.
      * @param attribute attribute name.
      * @return the attribute value or <code>""</code> if not found.
      */
     public static String readAttribute(Node node, String attribute) {
         return readAttribute(node, attribute, "");
     }
 
     /**
      * Given a <i>DOM</i> {@link Node} produces the <i>XML</i> serialization
      * omitting the <i>XML declaration</i>.
      *
      * @param node node to be serialized.
      * @param indent if <code>true</code> the output is indented.
      * @return the XML serialization.
      * @throws TransformerException if an error occurs during the
      *         serializator initialization and activation.
      * @throws java.io.IOException
      */
     public static String serializeToXML(Node node, boolean indent) throws TransformerException, IOException {
         final DOMSource domSource = new DOMSource(node);
         final Transformer transformer = TransformerFactory.newInstance().newTransformer();
         transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
         transformer.setOutputProperty(OutputKeys.METHOD, "xml");
         transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
         if(indent) {
             transformer.setOutputProperty(OutputKeys.INDENT, "yes");
             transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
         }
         final StringWriter sw = new StringWriter();
         final StreamResult sr = new StreamResult(sw);
         transformer.transform(domSource, sr);
         sw.close();
         return sw.toString();
     }
 
 }

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		package org.apache.any23.extractor.html;
19
20		import org.w3c.dom.NamedNodeMap;
21		import org.w3c.dom.Node;
22		import org.w3c.dom.NodeList;
23
24		import javax.xml.transform.OutputKeys;
25		import javax.xml.transform.Transformer;
26		import javax.xml.transform.TransformerException;
27		import javax.xml.transform.TransformerFactory;
28		import javax.xml.transform.dom.DOMSource;
29		import javax.xml.transform.stream.StreamResult;
30		import javax.xml.xpath.XPath;
31		import javax.xml.xpath.XPathConstants;
32		import javax.xml.xpath.XPathExpressionException;
33		import javax.xml.xpath.XPathFactory;
34		import java.io.IOException;
35		import java.io.StringWriter;
36		import java.util.ArrayList;
37		import java.util.List;
38
39		/**
40		* This class provides utility methods for DOM manipulation.
41		* It is separated from {@link HTMLDocument} so that its methods
42		* can be run on single DOM nodes without having to wrap them
43		* into an HTMLDocument.
44		* We use a mix of XPath and DOM manipulation.
45		* <p/>
46		* This is likely to be a performance bottleneck but at least
47		* everything is localized here.
48		* <p/>
49		*/
50		public class DomUtils {
51
52	0	private static final String[] EMPTY_STRING_ARRAY = new String[0];
53
54	0	private final static XPath xPathEngine = XPathFactory.newInstance().newXPath();
55
56	0	private DomUtils(){}
57
58		/**
59		* Given a node this method returns the index corresponding to such node
60		* within the list of the children of its parent node.
61		*
62		* @param n the node of which returning the index.
63		* @return a non negative number.
64		*/
65		public static int getIndexInParent(Node n) {
66	0	Node parent = n.getParentNode();
67	0	if(parent == null) {
68	0	return 0;
69		}
70	0	NodeList nodes = parent.getChildNodes();
71	0	int counter = -1;
72	0	for(int i = 0; i < nodes.getLength(); i++) {
73	0	Node current = nodes.item(i);
74	0	if ( current.getNodeType() == n.getNodeType() && current.getNodeName().equals( n.getNodeName() ) ) {
75	0	counter++;
76		}
77	0	if( current.equals(n) ) {
78	0	return counter;
79		}
80		}
81	0	throw new IllegalStateException("Cannot find a child within its parent node list.");
82		}
83
84		/**
85		* Does a reverse walking of the DOM tree to generate a unique XPath
86		* expression leading to this node. The XPath generated is the canonical
87		* one based on sibling index: /html[1]/body[1]/div[2]/span[3] etc..
88		*
89		* @param node the input node.
90		* @return the XPath location of node as String.
91		*/
92		public static String getXPathForNode(Node node) {
93	0	final StringBuilder sb = new StringBuilder();
94	0	Node parent = node;
95	0	while(parent != null && parent.getNodeType() != Node.DOCUMENT_NODE) {
96	0	sb.insert(0, "]");
97	0	sb.insert(0, getIndexInParent(parent) + 1);
98	0	sb.insert(0, "[");
99	0	sb.insert(0, parent.getNodeName());
100	0	sb.insert(0, "/");
101	0	parent = parent.getParentNode();
102		}
103	0	return sb.toString();
104		}
105
106		/**
107		* Returns a list of tag names representing the path from
108		* the document root to the given node <i>n</i>.
109		*
110		* @param n the node for which retrieve the path.
111		* @return a sequence of HTML tag names.
112		*/
113		public static String[] getXPathListForNode(Node n) {
114	0	if(n == null) {
115	0	return EMPTY_STRING_ARRAY;
116		}
117	0	List<String> ancestors = new ArrayList<String>();
118	0	ancestors.add( String.format("%s[%s]", n.getNodeName(), getIndexInParent(n) ) );
119	0	Node parent = n.getParentNode();
120	0	while(parent != null) {
121	0	ancestors.add(0, String.format("%s[%s]", parent.getNodeName(), getIndexInParent(parent) ) );
122	0	parent = parent.getParentNode();
123		}
124	0	return ancestors.toArray( new String[ancestors.size()] );
125		}
126
127		/**
128		* Returns the row/col location of the given node.
129		*
130		* @param n input node.
131		* @return an array of two elements of type
132		* <code>[<begin-row>, <begin-col>, <end-row> <end-col>]</code>
133		* or <code>null</code> if not possible to extract such data.
134		*/
135		public static int[] getNodeLocation(Node n) {
136	0	if(n == null) throw new NullPointerException("node cannot be null.");
137	0	final TagSoupParser.ElementLocation elementLocation =
138		(TagSoupParser.ElementLocation) n.getUserData( TagSoupParser.ELEMENT_LOCATION );
139	0	if(elementLocation == null) return null;
140	0	return new int[]{
141		elementLocation.getBeginLineNumber(),
142		elementLocation.getBeginColumnNumber(),
143		elementLocation.getEndLineNumber(),
144		elementLocation.getEndColumnNumber()
145		};
146		}
147
148		/**
149		* Checks whether a node is ancestor or same of another node.
150		*
151		* @param candidateAncestor the candidate ancestor node.
152		* @param candidateSibling the candidate sibling node.
153		* @param strict if <code>true</code> is not allowed that the ancestor and sibling can be the same node.
154		* @return <code>true</code> if <code>candidateSibling</code> is ancestor of <code>candidateSibling</code>,
155		* <code>false</code> otherwise.
156		*/
157		public static boolean isAncestorOf(Node candidateAncestor, Node candidateSibling, boolean strict) {
158	0	if(candidateAncestor == null) throw new NullPointerException("candidate ancestor cannot be null null.");
159	0	if(candidateSibling == null) throw new NullPointerException("candidate sibling cannot be null null." );
160	0	if(strict && candidateAncestor.equals(candidateSibling)) return false;
161	0	Node parent = candidateSibling;
162	0	while(parent != null) {
163	0	if(parent.equals(candidateAncestor)) return true;
164	0	parent = parent.getParentNode();
165		}
166	0	return false;
167		}
168
169		/**
170		* Checks whether a node is ancestor or same of another node. As
171		* {@link #isAncestorOf(org.w3c.dom.Node, org.w3c.dom.Node, boolean)} with <code>strict=false</code>.
172		*
173		* @param candidateAncestor the candidate ancestor node.
174		* @param candidateSibling the candidate sibling node.
175		* @return <code>true</code> if <code>candidateSibling</code> is ancestor of <code>candidateSibling</code>,
176		* <code>false</code> otherwise.
177		*/
178		public static boolean isAncestorOf(Node candidateAncestor, Node candidateSibling) {
179	0	return isAncestorOf(candidateAncestor, candidateSibling, false);
180		}
181
182		/**
183		* Finds all nodes that have a declared class.
184		* Note that the className is transformed to lower case before being
185		* matched against the DOM.
186		* @param root the root node from which start searching.
187		* @param className the name of the filtered class.
188		* @return list of matching nodes or an empty list.
189		*/
190		public static List<Node> findAllByClassName(Node root, String className) {
191	0	return findAllByTagAndClassName(root, "*", className.toLowerCase());
192		}
193
194		/**
195		* Finds all nodes that have a declared attribute.
196		* Note that the className is transformed to lower case before being
197		* matched against the DOM.
198		* @param root the root node from which start searching.
199		* @param attrName the name of the filtered attribue.
200		* @return list of matching nodes or an empty list.
201		*/
202		public static List<Node> findAllByAttributeName(Node root, String attrName) {
203	0	List<Node> result = new ArrayList<Node>();
204	0	for (Node node : findAll(root, String.format("./descendant-or-self::*[@%s]", attrName) ) ) {
205	0	result.add(node);
206		}
207	0	return result;
208		}
209
210		public static List<Node> findAllByTag(Node root, String tagName) {
211	0	List<Node> result = new ArrayList<Node>();
212	0	for (Node node : findAll(root, "./descendant-or-self::" + tagName)) {
213	0	result.add(node);
214		}
215	0	return result;
216		}
217
218		public static List<Node> findAllByTagAndClassName(Node root, String tagName, String className) {
219	0	List<Node> result = new ArrayList<Node>();
220	0	for (Node node : findAll(
221		root,
222		"./descendant-or-self::" +
223		tagName +
224		"[contains(translate(@class,'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'),'" +
225		className + "')]")
226		) {
227	0	if (DomUtils.hasClassName(node, className)) {
228	0	result.add(node);
229		}
230		}
231	0	return result;
232		}
233
234		/**
235		* Mimics the JS DOM API, or prototype's $()
236		*/
237		public static Node findNodeById(Node root, String id) {
238		Node node;
239		try {
240	0	String xpath = "//*[@id='" + id + "']";
241	0	node = (Node) xPathEngine.evaluate(xpath, root, XPathConstants.NODE);
242	0	} catch (XPathExpressionException ex) {
243	0	throw new RuntimeException("Should not happen", ex);
244	0	}
245	0	return node;
246		}
247
248		/**
249		* Returns a NodeList composed of all the nodes that match an XPath
250		* expression, which must be valid.
251		*/
252		public static List<Node> findAll(Node node, String xpath) {
253	0	if(node == null) {
254	0	throw new NullPointerException("node cannot be null.");
255		}
256		try {
257	0	NodeList nodes = (NodeList) xPathEngine.evaluate(xpath, node, XPathConstants.NODESET);
258	0	List<Node> result = new ArrayList<Node>(nodes.getLength());
259	0	for (int i = 0; i < nodes.getLength(); i++) {
260	0	result.add(nodes.item(i));
261		}
262	0	return result;
263	0	} catch (XPathExpressionException ex) {
264	0	throw new IllegalArgumentException("Illegal XPath expression: " + xpath, ex);
265		}
266		}
267
268		/**
269		* Gets the string value of an XPath expression.
270		*/
271		public static String find(Node node, String xpath) {
272		try {
273	0	String val = (String) xPathEngine.evaluate(xpath, node, XPathConstants.STRING);
274	0	if (null == val)
275	0	return "";
276	0	return val;
277	0	} catch (XPathExpressionException ex) {
278	0	throw new IllegalArgumentException("Illegal XPath expression: " + xpath, ex);
279		}
280		}
281
282		/**
283		* Tells if an element has a class name <b>not checking the parents
284		* in the hierarchy</b> mimicking the <i>CSS</i> .foo match.
285		*/
286		public static boolean hasClassName(Node node, String className) {
287	0	return hasAttribute(node, "class", className);
288		}
289
290		/**
291		* Checks the presence of an attribute value in attributes that
292		* contain whitespace-separated lists of values. The semantic is the
293		* CSS classes' ones: "foo" matches "bar foo", "foo" but not "foob"
294		*/
295		public static boolean hasAttribute(Node node, String attributeName, String className) {
296		// regex love, maybe faster but less easy to understand
297		// Pattern pattern = Pattern.compile("(^\|\\s+)"+className+"(\\s+\|$)");
298	0	String attr = readAttribute(node, attributeName);
299	0	for (String c : attr.split("\\s+"))
300	0	if (c.equalsIgnoreCase(className))
301	0	return true;
302	0	return false;
303		}
304
305		/**
306		* Checks the presence of an attribute in the given <code>node</code>.
307		*
308		* @param node the node container.
309		* @param attributeName the name of the attribute.
310		*/
311		public static boolean hasAttribute(Node node, String attributeName) {
312	0	return readAttribute(node, attributeName, null) != null;
313		}
314
315		/**
316		* Verifies if the given target node is an element.
317		*
318		* @param target
319		* @return <code>true</code> if the element the node is an element,
320		* <code>false</code> otherwise.
321		*/
322		public static boolean isElementNode(Node target) {
323	0	return Node.ELEMENT_NODE == target.getNodeType();
324		}
325
326		/**
327		* Reads the value of the specified <code>attribute</code>, returning the
328		* <code>defaultValue</code> string if not present.
329		*
330		* @param node node to read the attribute.
331		* @param attribute attribute name.
332		* @param defaultValue the default value to return if attribute is not found.
333		* @return the attribute value or <code>defaultValue</code> if not found.
334		*/
335		public static String readAttribute(Node node, String attribute, String defaultValue) {
336	0	NamedNodeMap attributes = node.getAttributes();
337	0	if (null == attributes)
338	0	return defaultValue;
339	0	Node attr = attributes.getNamedItem(attribute);
340	0	if (null==attr)
341	0	return defaultValue;
342	0	return attr.getNodeValue();
343		}
344
345		/**
346		* Reads the value of the first <i>attribute</i> which name matches with the specified <code>attributePrefix</code>.
347		* Returns the <code>defaultValue</code> if not found.
348		*
349		* @param node node to look for attributes.
350		* @param attributePrefix attribute prefix.
351		* @param defaultValue default returned value.
352		* @return the value found or default.
353		*/
354		public static String readAttributeWithPrefix(Node node, String attributePrefix, String defaultValue) {
355	0	final NamedNodeMap attributes = node.getAttributes();
356	0	if (null == attributes) {
357	0	return defaultValue;
358		}
359		Node attribute;
360	0	for (int a = 0; a < attributes.getLength(); a++) {
361	0	attribute = attributes.item(a);
362	0	if (attribute.getNodeName().startsWith(attributePrefix)) {
363	0	return attribute.getNodeValue();
364		}
365		}
366	0	return defaultValue;
367		}
368
369		/**
370		* Reads the value of an <code>attribute</code>, returning the
371		* empty string if not present.
372		*
373		* @param node node to read the attribute.
374		* @param attribute attribute name.
375		* @return the attribute value or <code>""</code> if not found.
376		*/
377		public static String readAttribute(Node node, String attribute) {
378	0	return readAttribute(node, attribute, "");
379		}
380
381		/**
382		* Given a <i>DOM</i> {@link Node} produces the <i>XML</i> serialization
383		* omitting the <i>XML declaration</i>.
384		*
385		* @param node node to be serialized.
386		* @param indent if <code>true</code> the output is indented.
387		* @return the XML serialization.
388		* @throws TransformerException if an error occurs during the
389		* serializator initialization and activation.
390		* @throws java.io.IOException
391		*/
392		public static String serializeToXML(Node node, boolean indent) throws TransformerException, IOException {
393	0	final DOMSource domSource = new DOMSource(node);
394	0	final Transformer transformer = TransformerFactory.newInstance().newTransformer();
395	0	transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
396	0	transformer.setOutputProperty(OutputKeys.METHOD, "xml");
397	0	transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
398	0	if(indent) {
399	0	transformer.setOutputProperty(OutputKeys.INDENT, "yes");
400	0	transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
401		}
402	0	final StringWriter sw = new StringWriter();
403	0	final StreamResult sr = new StreamResult(sw);
404	0	transformer.transform(domSource, sr);
405	0	sw.close();
406	0	return sw.toString();
407		}
408
409		}