View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.jetspeed.rewriter.html;
18  
19  import java.io.IOException;
20  import java.io.Reader;
21  import java.io.Writer;
22  import java.util.Enumeration;
23  
24  import javax.swing.text.MutableAttributeSet;
25  import javax.swing.text.html.HTML;
26  import javax.swing.text.html.HTMLEditorKit;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.jetspeed.rewriter.ParserAdaptor;
31  import org.apache.jetspeed.rewriter.Rewriter;
32  import org.apache.jetspeed.rewriter.RewriterException;
33  
34  /***
35   * HTML Parser Adaptor for the Swing 'HotJava' parser.
36   *
37   * @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
38   * @version $Id: SwingParserAdaptor.java 516448 2007-03-09 16:25:47Z ate $
39   */
40  public class SwingParserAdaptor implements ParserAdaptor
41  {
42      protected final static Log log = LogFactory.getLog(SwingParserAdaptor.class);
43  
44      private SwingParserAdaptor.Callback callback = null;
45      private String lineSeparator;
46      private boolean skippingImplied = false;
47      private Rewriter rewriter;
48      
49      /*
50       * Construct a swing (hot java) parser adaptor
51       * Receives a Rewriter parameter, which is used as a callback when rewriting URLs.
52       * The rewriter object executes the implementation specific URL rewriting.
53       *
54       * @param rewriter The rewriter object that is called back during URL rewriting
55       */
56      public SwingParserAdaptor()
57      {
58          lineSeparator = System.getProperty("line.separator", "\r\n");         
59      }
60  
61      /*
62       * Parses and an HTML document, rewriting all URLs as determined by the Rewriter callback
63       *
64       *
65       * @param reader The input stream reader 
66       *
67       * @throws MalformedURLException 
68       *
69       * @return An HTML-String with rewritten URLs.
70       */    
71      public void rewrite(Rewriter rewriter, Reader reader, Writer writer)
72          throws RewriterException
73      {
74          try
75          {
76              this.rewriter = rewriter;            
77              HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter().getParser();                    
78              callback = new SwingParserAdaptor.Callback(writer);
79              parser.parse(reader, callback, true);
80          } 
81          catch (Exception e)
82          {
83              e.printStackTrace();
84              throw new RewriterException(e);
85          }
86      }
87  
88      public void parse(Rewriter rewriter, Reader reader)
89          throws RewriterException    
90      {
91          try
92          {
93              this.rewriter = rewriter;            
94              HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter().getParser();        
95              callback = new SwingParserAdaptor.Callback(null);
96              parser.parse(reader, callback, true);
97          } 
98          catch (Exception e)
99          {
100             e.printStackTrace();
101             throw new RewriterException(e);
102         }
103     }
104     
105     /*
106      * This Class is needed, because getParser is protected and therefore 
107      *  only accessibly by a subclass
108      */
109     class ParserGetter extends HTMLEditorKit
110     {
111 
112         public HTMLEditorKit.Parser getParser()
113         {
114             return super.getParser();
115         }
116     } 
117     
118     /*
119      *  Swing Parser Callback from the HTMLEditorKit.
120      * This class handles all SAX-like events during parsing.
121      *
122      */
123     class Callback extends HTMLEditorKit.ParserCallback
124     {
125         // either handling of <FORM> is buggy, or I made some weird mistake ... 
126         // ... JDK 1.3 sends double "</form>"-tags on closing <form>
127         private boolean inForm = false; 
128         private boolean inScript = false; 
129         private boolean strip = false;
130         private boolean simpleTag = false;
131         private String stripTag = null;
132         private Writer writer = null;
133 
134         private Callback (Writer writer) 
135         {
136             this.writer = writer;
137         }
138 
139         //
140         // -------------- Hot Java event callbacks... --------------------
141         //
142 
143         /*
144          *  Hot Java event callback for text (all data in between tags)
145          * 
146          * @param values The array of characters containing the text.
147          */
148         public void handleText(char[] values,int param) 
149         {
150              if (strip)
151              {                               
152                  return;
153              }                                      
154              if (values[0] == '>')
155              {                            
156                  return;
157              }     
158              if (false == rewriter.enterText(values, param))
159              {
160                 return;
161              }                    
162 
163             addToResult(values);
164         }
165 
166         private void write(String text)
167             throws IOException
168         {
169             if (writer != null)
170             {
171                 writer.write(text);
172             }
173         }
174         
175         /*
176          * Hot Java event callback for handling a simple tag (without begin/end)
177          *
178          * @param tag The HTML tag being handled.
179          * @param attrs The mutable HTML attribute set for the current HTML element.         
180          * @param position the position of the tag.         
181          *
182          */
183         public void handleSimpleTag(HTML.Tag htmlTag, MutableAttributeSet attrs, int param) 
184         {
185             String tag = htmlTag.toString();
186             
187             if (false == rewriter.enterSimpleTagEvent(tag, new SwingAttributes(attrs)))
188             {
189                 return;
190             }
191 
192             if (strip)
193             {
194                 return;
195             }
196             
197             if (rewriter.shouldStripTag(tag))
198             {
199                 return;            
200             }
201             
202             if (rewriter.shouldRemoveTag(tag))
203             {
204                 return;
205             }
206             
207             try
208             {
209                 simpleTag = true;                
210                 appendTagToResult(htmlTag, attrs);
211                 write(lineSeparator);
212 /*
213                 if (tag.toString().equalsIgnoreCase("param") ||
214                     tag.toString().equalsIgnoreCase("object") ||
215                     tag.toString().equalsIgnoreCase("embed"))
216                 {
217                     write(lineSeparator);
218                 }
219 */                
220                 simpleTag = false;
221                 String appended = rewriter.exitSimpleTagEvent(tag, new SwingAttributes(attrs));
222                 if (null != appended)
223                 {
224                     write(appended);
225                 }
226             }
227             catch (Exception e)
228             {
229                 log.error("Simple tag parsing error", e);                    
230             }
231         }
232 
233         /*
234          * Hot Java event callback for handling a start tag.
235          *
236          * @param tag The HTML tag being handled.
237          * @param attrs The mutable HTML attribute set for the current HTML element.         
238          * @param position the position of the tag.         
239          *
240          */
241         public void handleStartTag(HTML.Tag htmlTag,  MutableAttributeSet attrs, int position) 
242         {
243             String tag = htmlTag.toString();
244             
245             if (false == rewriter.enterStartTagEvent(tag, new SwingAttributes(attrs)))
246             {
247                 return;
248             }
249             
250             if (strip)
251             {
252                 return;
253             }
254             
255             if (rewriter.shouldStripTag(tag))
256             {
257                 stripTag = tag;
258                 strip = true;
259                 return;            
260             }
261             
262             if (rewriter.shouldRemoveTag(tag))
263             {
264                 return;
265             }
266             
267             try
268             {
269                 appendTagToResult(htmlTag, attrs);
270                 formatLine(htmlTag);
271                 String appended = rewriter.exitStartTagEvent(tag, new SwingAttributes(attrs));
272                 if (null != appended)
273                 {
274                     write(appended);
275                 }
276             }                    
277             catch (Exception e)
278             {
279                 log.error("Start tag parsing error", e);                    
280             }
281                     
282         }
283         
284 
285 
286         /*
287          * Hot Java event callback for handling an end tag.
288          *
289          * @param tag The HTML tag being handled.
290          * @param position the position of the tag.
291          *
292          */
293         public void handleEndTag(HTML.Tag htmlTag, int position) 
294         {
295             String tag = htmlTag.toString();
296             if (false == rewriter.enterEndTagEvent(tag.toString()))
297             {
298                 return;
299             }
300             
301             if (strip)
302             {
303                 if (tag.equalsIgnoreCase(stripTag))
304                 {
305                     strip = false;
306                     stripTag = null;
307                 }
308                 return;
309             }
310             
311             if (rewriter.shouldRemoveTag(tag))
312             {
313                 return;                                
314             }
315              
316             try
317             {                            
318                 addToResult("</").addToResult(tag).addToResult(">");
319     
320                 // formatLine(htmlTag);
321                 write(lineSeparator);
322                 
323                 String appended = rewriter.exitEndTagEvent(tag);
324                 if (null != appended)
325                 {
326                     write(appended);
327                 }
328             }                    
329             catch (Exception e)
330             {
331                 log.error("End tag parsing error", e);                                    
332             }                    
333         }
334 
335 
336         /*
337          * Hot Java event callback for handling errors.
338          *
339          * @param str The error message from Swing.
340          * @param param A parameter passed to handler.
341          *
342          */
343         public void handleError(java.lang.String str,int param) 
344         {
345             // System.out.println("Handling error: " + str);
346         }
347 
348         /*
349          * Hot Java event callback for HTML comments.
350          *
351          * @param values The character array of text comments.
352          * @param param A parameter passed to handler.
353          *
354          */
355         public void handleComment(char[] values,int param) 
356         {
357             if (strip || rewriter.shouldRemoveComments())
358             {
359                 return;             
360             }
361             addToResult("<!-- ").addToResult(values).addToResult(" -->").addToResult(lineSeparator);
362         }
363 
364         /*
365          * Hot Java event callback for end of line strings.
366          *
367          * @param str The end-of-line string.
368          *
369          */
370         public void handleEndOfLineString(java.lang.String str) 
371         {
372             if (strip)
373             {                               
374                 return;
375             }                                      
376             
377             addToResult(lineSeparator);
378             addToResult(str);
379         }
380 
381 
382         /*
383          * Prints new lines to make the output a little easier to read when debugging.
384          *
385          * @param tag The HTML tag being handled.         
386          *
387          */
388         private void formatLine(HTML.Tag tag)
389         {
390             try
391             {
392                 if (tag.isBlock() || 
393                     tag.breaksFlow() || 
394                     tag == HTML.Tag.FRAME ||
395                     tag == HTML.Tag.FRAMESET ||
396                     tag == HTML.Tag.SCRIPT)
397                 {
398                     write(lineSeparator);
399                 }
400                 
401             }                    
402             catch (Exception e)
403             {
404                 log.error("Format Line tag parsing error", e);                    
405             }
406             
407         }
408 
409 
410         /*
411          * Used to write tag and attribute objects to the output stream.
412          * Returns a reference to itself so that these calls can be chained.
413          *
414          * @param txt Any text to be written out to stream with toString method.
415          *            The object being written should implement its toString method.
416          * @return A handle to the this, the callback, for chaining results.
417          *
418          */
419         private Callback addToResult(Object txt)
420         {
421             // to allow for implementation using Stringbuffer or StringWriter
422             // I don't know yet, which one is better in this case
423             //if (ignoreLevel > 0 ) return this;
424 
425             try
426             {
427                 write(txt.toString());
428             } 
429             catch (Exception e)
430             {
431                 System.err.println("Error parsing:" + e);
432             }
433             return this;
434         }
435 
436 
437         /*
438          * Used to write all character content to the output stream.
439          * Returns a reference to itself so that these calls can be chained.
440          *
441          * @param txt Any character text to be written out directly to stream.
442          * @return A handle to the this, the callback, for chaining results.
443          *
444          */
445         private Callback addToResult(char[] txt)
446         {
447             //if (ignoreLevel > 0) return this;
448 
449             try
450             {
451                 if (writer != null)
452                 {
453                     writer.write(txt);
454                 }
455 
456             } 
457             catch (Exception e)
458             { /* ignore */
459             }
460             return this;
461         }
462 
463         /* 
464          * Accessor to the Callback's content-String
465          *
466          * @return Cleaned and rewritten HTML-Content
467          */        
468         public void getResult() 
469         {
470             try
471             {
472                 if (writer != null)
473                 {
474                     writer.flush();
475                 }
476             } 
477             catch (Exception e)
478             { /* ignore */
479             }
480 
481             // WARNING: doesn't work, if you remove " " + ... but don't know why
482             //String res = " " + result.toString(); 
483 
484             // return res;
485         }
486 
487         /*
488          * Flushes the output stream. NOT IMPLEMENTED
489          *
490          */
491         public void flush() throws javax.swing.text.BadLocationException 
492         {
493             // nothing to do here ...
494         }
495 
496         /*
497          * Writes output to the final stream for all attributes of a given tag.
498          *
499          * @param tag The HTML tag being output.
500          * @param attrs The mutable HTML attribute set for the current HTML tag.
501          *
502          */
503         private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs) 
504         {
505             convertURLS(tag, attrs);
506             Enumeration e = attrs.getAttributeNames();
507             addToResult("<").addToResult(tag);
508             while (e.hasMoreElements())
509             {
510                 Object attr = e.nextElement();
511                 String value = attrs.getAttribute(attr).toString();
512                 addToResult(" ").addToResult(attr).addToResult("=\"").
513                 addToResult(value).addToResult("\"");
514             }        
515             if (simpleTag)
516                 addToResult("/>");
517             else             
518                 addToResult(">");
519         }
520 
521 
522         /*
523          * Determines which HTML Tag/Element is being inspected, and calls the 
524          * appropriate converter for that context.  This method contains all the
525          * logic for determining how tags are rewritten. 
526          *
527          * @param tag TAG from the Callback-Interface.
528          * @param attrs The mutable HTML attribute set for the current HTML element.
529          */
530 
531         private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs ) 
532         {
533             rewriter.enterConvertTagEvent(tag.toString(), new SwingAttributes(attrs));
534 
535             /*
536               if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
537                 ignoreLevel ++;
538               */
539         }
540 
541 
542     }
543     
544 }