View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.jetspeed.rewriter.xml;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.io.Reader;
23  import java.io.Writer;
24  import java.net.URL;
25  import java.util.HashMap;
26  import java.util.Map;
27  
28  import javax.xml.parsers.ParserConfigurationException;
29  import javax.xml.parsers.SAXParser;
30  import javax.xml.parsers.SAXParserFactory;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.jetspeed.rewriter.ParserAdaptor;
35  import org.apache.jetspeed.rewriter.Rewriter;
36  import org.apache.jetspeed.rewriter.RewriterException;
37  import org.apache.jetspeed.rewriter.MutableAttributes;
38  import org.apache.jetspeed.util.Streams;
39  import org.xml.sax.InputSource;
40  import org.xml.sax.SAXException;
41  import org.xml.sax.helpers.DefaultHandler;
42  
43  /***
44   * SaxParserAdaptor
45   *
46   * @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
47   * @version $Id: SaxParserAdaptor.java 516448 2007-03-09 16:25:47Z ate $
48   */
49  public class SaxParserAdaptor implements ParserAdaptor
50  {
51      protected final static Log log = LogFactory.getLog(SaxParserAdaptor.class);
52      private String lineSeparator;
53  
54      private Rewriter rewriter;
55  
56      
57      
58  
59      public SaxParserAdaptor()
60      {
61          lineSeparator = System.getProperty("line.separator", "\r\n");
62      }
63      
64      /* (non-Javadoc)
65       * @see org.apache.jetspeed.syndication.services.crawler.rewriter.ParserAdaptor#parse(org.apache.jetspeed.syndication.services.crawler.rewriter.Rewriter, java.io.Reader)
66       */
67      public void parse(Rewriter rewriter, Reader reader)
68          throws RewriterException
69      {
70          try
71          {
72              this.rewriter = rewriter;        
73              SAXParser sp = getParser();            
74              sp.parse(new InputSource(reader), new SaxFormatHandler(null));                                                    
75          } 
76          catch (Exception e)
77          {
78              e.printStackTrace();
79              throw new RewriterException(e);
80          }
81                   
82      }
83      
84      /* (non-Javadoc)
85       * @see org.apache.jetspeed.syndication.services.crawler.rewriter.ParserAdaptor#rewrite(org.apache.jetspeed.syndication.services.crawler.rewriter.Rewriter, java.io.Reader, java.io.Writer)
86       */
87      public void rewrite(Rewriter rewriter, Reader reader, Writer writer)
88          throws RewriterException
89      {
90          // TODO Auto-generated method stub
91      }
92      
93      /***
94       * Get a Parser from the SAX Parser factory
95       *
96       * @return A SAXParser
97       */
98      protected SAXParser getParser()
99          throws ParserConfigurationException, SAXException
100     {
101         SAXParserFactory spf = SAXParserFactory.newInstance ();
102         spf.setValidating(false);
103 
104         return spf.newSAXParser ();
105     }
106 
107     /***
108      * Inner class to handle SAX parsing of XML files
109      */
110     public class SaxFormatHandler extends DefaultHandler
111     {    
112         private int elementCount = 0;
113         private boolean emit = true;
114         private Writer writer = null;
115 
116         public SaxFormatHandler(Writer writer)
117         {
118             super();
119             this.writer = writer;
120         }
121         
122         private void write(String text)
123             throws IOException
124         {
125             if (writer != null)
126             {
127                 writer.write(text);
128             }
129         }
130 
131         public void characters(char[] values, int start, int length)
132         {
133             if (false == emit)                               
134                 return;                                      
135 
136             if (false == rewriter.enterText(values, start))
137                return;                    
138 
139             if (writer != null)
140             {
141                 try
142                 {
143                     writer.write(values);
144                 }
145                 catch(IOException e)
146                 {                
147                 }
148             }            
149         }
150             
151         public void startElement(String uri, String localName, String qName, MutableAttributes attributes) 
152             throws SAXException
153         {
154 //            System.out.println("qName = " + qName);
155 //            System.out.println("localName = " + localName);
156 //            System.out.println("uri = " + uri);
157             String tag = qName;
158             
159             if (false == rewriter.enterStartTagEvent(tag.toString(), attributes))
160                 return;
161 
162             try
163             {
164                 appendTagToResult(tag, attributes);
165                 write(lineSeparator);                
166                 String appended = rewriter.exitStartTagEvent(tag.toString(), attributes);
167                 if (null != appended)
168                 {
169                     write(appended);
170                 }
171             }                    
172             catch (Exception e)
173             {
174                 log.error("Start tag parsing error", e);                    
175             }
176         }
177     
178         public void endElement(String uri, String localName, String qName) 
179             throws SAXException
180         {
181             String tag = qName;
182             elementCount++;
183             if (false == rewriter.enterEndTagEvent(tag.toString()))
184                 return;
185                 
186             try
187             {                            
188                 addToResult("</").addToResult(tag).addToResult(">");
189     
190                 write(lineSeparator);                
191                 String appended = rewriter.exitEndTagEvent(tag.toString());
192                 if (null != appended)
193                 {
194                     write(appended);
195                 }
196             }                    
197             catch (Exception e)
198             {
199                 log.error("End tag parsing error", e);                                    
200             }                    
201             
202         }
203 
204         /*
205          * Writes output to the final stream for all attributes of a given tag.
206          *
207          * @param tag The HTML tag being output.
208          * @param attrs The mutable HTML attribute set for the current HTML tag.
209          */
210         private void appendTagToResult(String tag, MutableAttributes attrs) 
211         {
212             convertURLS(tag, attrs);
213             addToResult("<").addToResult(tag);
214             for (int ix = 0; ix < attrs.getLength(); ix++)
215             {
216                 String value = attrs.getValue(ix);
217                 addToResult(" ").addToResult(value).addToResult("=\"").
218                 addToResult(value).addToResult("\"");
219             }        
220             addToResult(">");
221         }
222     
223         /*
224          * Used to write tag and attribute objects to the output stream.
225          * Returns a reference to itself so that these calls can be chained.
226          *
227          * @param txt Any text to be written out to stream with toString method.
228          *            The object being written should implement its toString method.
229          * @return A handle to the this, the callback, for chaining results.
230          *
231          */
232         private SaxFormatHandler addToResult(Object txt)
233         {
234             // to allow for implementation using Stringbuffer or StringWriter
235             // I don't know yet, which one is better in this case
236             // if (ignoreLevel > 0 ) return this;
237 
238             try
239             {
240                 write(txt.toString());
241             } 
242             catch (Exception e)
243             {
244                 System.err.println("Error parsing:" + e);
245             }
246             return this;
247         }
248 
249         /*
250          * Determines which HTML Tag/Element is being inspected, and calls the 
251          * appropriate converter for that context.  This method contains all the
252          * logic for determining how tags are rewritten. 
253          *
254          * TODO: it would be better to drive this logic off a state table that is not
255          * tied to the Hot Java parser.
256          *
257          * @param tag TAG from the Callback-Interface.
258          * @param attrs The mutable HTML attribute set for the current HTML element.
259          */
260 
261         private void convertURLS(String tag, MutableAttributes attrs) 
262         {
263             rewriter.enterConvertTagEvent(tag.toString(), attrs);
264         }
265              
266         public InputSource resolveEntity (String publicId, String systemId)
267         {
268             
269             try 
270             {
271                 Map dtds = getDtds();   
272                 byte[] dtd = (byte[])dtds.get(systemId);
273                 if (dtd == null)
274                 {
275                     ByteArrayOutputStream baos = new ByteArrayOutputStream();
276                     URL url = new URL(systemId);
277                     Streams.drain(url.openStream(), baos);
278                     dtd = baos.toByteArray();
279                     dtds.put(systemId, dtd);                    
280                 }
281                                 
282                 if (dtd != null)
283                 {
284                     ByteArrayInputStream bais = new ByteArrayInputStream(dtd);
285                     InputSource is = new InputSource(bais);
286                     is.setPublicId( publicId );
287                     is.setSystemId( systemId );
288                                         
289                     return is;
290                 }
291             } 
292             catch(Throwable t ) // java.io.IOException x  
293             {
294                 t.printStackTrace();
295                 log.error("failed to get URL input source", t);
296             }
297             
298             // forces to get dtd over internet
299             return null;
300         }
301     
302     }
303 
304     // DTD Map     
305     static private Map dtds = new HashMap();
306     
307     public static Map getDtds()
308     {
309         return dtds;
310     }
311 
312     public static void clearDtdCache()
313     {
314         dtds.clear();
315     }
316     
317 }