View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.jetspeed.rewriter.html.neko;
18  
19  import java.io.Reader;
20  import java.io.IOException;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.xerces.xni.parser.XMLDocumentFilter;
25  import org.apache.xerces.xni.parser.XMLInputSource;
26  
27  import org.apache.jetspeed.rewriter.ParserAdaptor;
28  import org.apache.jetspeed.rewriter.Rewriter;
29  import org.apache.jetspeed.rewriter.RewriterException;
30  
31  import org.xml.sax.SAXException ;
32  
33  import org.cyberneko.html.parsers.SAXParser;
34  import org.cyberneko.html.filters.DefaultFilter;
35  import org.cyberneko.html.filters.Purifier;
36  
37  
38  /***
39   * <p>
40   * NekoParserAdapter
41   * </p>
42   * <p>
43   *  
44   * </p>
45   * @author <a href="mailto:dyoung@phase2systems.com">David L Young</a>
46   * @version $Id: $
47   *
48   */
49  public class NekoParserAdaptor implements ParserAdaptor
50  {
51      protected final static Log log = LogFactory.getLog(NekoParserAdaptor.class);
52      
53      /*
54       * Construct a cyberneko HTML parser adaptor
55       */
56      public NekoParserAdaptor()
57      {
58          super();
59      }
60      
61      /***
62       * <p>
63       * parse
64       * </p>
65       *
66       * @see org.apache.jetspeed.rewriter.ParserAdaptor#parse(org.apache.jetspeed.rewriter.Rewriter, java.io.Reader)
67       * @param rewriter
68       * @param reader
69       * @throws RewriterException
70       */
71      public void parse(Rewriter rewriter, Reader reader)
72              throws RewriterException
73      {
74          // not sure what this means to parse without rewriting
75          rewrite(rewriter,reader,null);
76      }
77  
78      /***
79       * <p>
80       * rewrite
81       * </p>
82       *
83       * @see org.apache.jetspeed.rewriter.ParserAdaptor#rewrite(org.apache.jetspeed.rewriter.Rewriter, java.io.Reader, java.io.Writer)
84       * @param rewriter
85       * @param reader
86       * @param writer
87       * @throws RewriterException
88       */
89      public void rewrite(Rewriter rewriter, java.io.Reader reader, java.io.Writer writer)
90              throws RewriterException
91      {
92          // use a cyberneko SAXParser
93          SAXParser parser = new SAXParser() ;
94  
95          // setup filter chain
96          XMLDocumentFilter[] filters = {
97              new Purifier(),                                                                                  // [1] standard neko purifications (tag balancing, etc)
98              new CallbackElementRemover( rewriter ),                                                          // [2] accept / reject tags based on advice from rewriter
99              writer != null ? new org.cyberneko.html.filters.Writer( writer, null ) : new DefaultFilter()     // [3] propagate results to specified writer (or do nothing -- Default -- when writer is null)
100         };
101         
102         String filtersPropName = "http://cyberneko.org/html/properties/filters";
103    
104         try
105         {
106             parser.setProperty(filtersPropName, filters);
107         }
108         catch (SAXException e)
109         {
110             // either no longer supported (SAXNotSupportedException), or no logner recognized (SAXNotRecognizedException)
111             log.error(filtersPropName + " is, unexpectedly, no longer defined for the cyberneko HTML parser",e);
112             throw new RewriterException("cyberneko parser version not supported",e);
113         }
114 
115         try
116         {
117             // parse from reader
118             parser.parse(new XMLInputSource( null, null, null, reader, null )) ;
119         }
120         catch (IOException e)
121         {
122             String msg = "cyberneko HTML parsing failure";
123             log.error(msg,e);
124             throw new RewriterException(msg,e);
125         }
126 
127     }
128 
129 }