1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.jetspeed.portlet;
18
19 import java.io.BufferedInputStream;
20 import java.io.ByteArrayInputStream;
21 import java.io.ByteArrayOutputStream;
22 import java.io.FileReader;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.OutputStreamWriter;
27 import java.io.PrintWriter;
28 import java.io.Reader;
29 import java.io.StringWriter;
30 import java.io.UnsupportedEncodingException;
31 import java.io.Writer;
32 import java.net.URL;
33 import java.util.ArrayList;
34 import java.util.Arrays;
35 import java.util.HashMap;
36 import java.util.Iterator;
37 import java.util.Map;
38 import java.util.StringTokenizer;
39
40 import javax.portlet.ActionRequest;
41 import javax.portlet.ActionResponse;
42 import javax.portlet.PortletConfig;
43 import javax.portlet.PortletContext;
44 import javax.portlet.PortletException;
45 import javax.portlet.PortletMode;
46 import javax.portlet.PortletURL;
47 import javax.portlet.RenderRequest;
48 import javax.portlet.RenderResponse;
49
50 import org.apache.commons.httpclient.Cookie;
51 import org.apache.commons.httpclient.Header;
52 import org.apache.commons.httpclient.HttpClient;
53 import org.apache.commons.httpclient.HttpMethod;
54 import org.apache.commons.httpclient.HttpMethodBase;
55 import org.apache.commons.httpclient.NameValuePair;
56 import org.apache.commons.httpclient.methods.GetMethod;
57 import org.apache.commons.httpclient.methods.MultipartPostMethod;
58 import org.apache.commons.httpclient.methods.PostMethod;
59 import org.apache.commons.logging.Log;
60 import org.apache.commons.logging.LogFactory;
61 import org.apache.jetspeed.portlet.webcontent.WebContentHistoryList;
62 import org.apache.jetspeed.portlet.webcontent.WebContentHistoryPage;
63 import org.apache.jetspeed.rewriter.JetspeedRewriterController;
64 import org.apache.jetspeed.rewriter.RewriterController;
65 import org.apache.jetspeed.rewriter.RewriterException;
66 import org.apache.jetspeed.rewriter.RulesetRewriter;
67 import org.apache.jetspeed.rewriter.WebContentRewriter;
68 import org.apache.jetspeed.rewriter.html.neko.NekoParserAdaptor;
69 import org.apache.jetspeed.rewriter.rules.Ruleset;
70 import org.apache.jetspeed.rewriter.xml.SaxParserAdaptor;
71 import org.apache.portals.bridges.velocity.GenericVelocityPortlet;
72 import org.apache.portals.messaging.PortletMessaging;
73
74
75 /***
76 * WebContentPortlet
77 *
78 * TODO: Preferences, cache stream instead of URL *
79 *
80 * @author <a href="mailto:rogerrutr@apache.org">Roger Ruttimann </a>
81 * @version $Id: WebContentPortlet.java 605431 2007-12-19 05:11:40Z taylor $
82 */
83
84 public class WebContentPortlet extends GenericVelocityPortlet
85 {
86
87 /***
88 * WebContentPortlet Allows navigation inside the portlet and caches the
89 * latest URL
90 */
91
92 /***
93 * Configuration constants.
94 */
95 public static final String VIEW_SOURCE_PARAM = "viewSource";
96 public static final String EDIT_SOURCE_PARAM = "editSource";
97
98
99 public static final String BROWSER_ACTION_PARAM = "wcBrowserAction";
100 public static final String BROWSER_ACTION_PREVIOUS_PAGE = "previousPage";
101 public static final String BROWSER_ACTION_REFRESH_PAGE = "refreshPage";
102 public static final String BROWSER_ACTION_NEXT_PAGE = "nextPage";
103
104 /***
105 * Action Parameter
106 */
107
108
109
110 public static final String HISTORY = "webcontent.history";
111 public static final String HTTP_STATE = "webcontent.http.state";
112
113
114
115 protected final static Log log = LogFactory.getLog(WebContentPortlet.class);
116 public final static String defaultEncoding = "UTF-8";
117
118
119
120 private RulesetRewriter rewriter = null;
121 private RewriterController rewriteController = null;
122
123 public static final String FORM_POST_METHOD = "post";
124 public static final String FORM_GET_METHOD = "get";
125 public static final String FORM_MULTIPART_METHOD = "multipart";
126
127 public WebContentPortlet()
128 {
129 super();
130 }
131
132 /***
133 * Initialize portlet configuration.
134 */
135 public void init(PortletConfig config) throws PortletException
136 {
137 super.init(config);
138 }
139
140 /***
141 * processAction() Checks action initiated by the WebContent portlet which
142 * means that a user has clicked on an URL
143 *
144 * @param actionRequest
145 * @param actionResponse
146 * @throws PortletException
147 * @throws IOException
148 */
149 public void processAction(ActionRequest actionRequest, ActionResponse actionResponse) throws PortletException,
150 IOException
151 {
152
153 String browserAction = actionRequest.getParameter(BROWSER_ACTION_PARAM);
154 if (browserAction != null)
155 {
156 if (!browserAction.equalsIgnoreCase(BROWSER_ACTION_REFRESH_PAGE))
157 {
158
159 WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(actionRequest, HISTORY);
160
161 if (browserAction.equalsIgnoreCase(BROWSER_ACTION_PREVIOUS_PAGE))
162 {
163 if (history.hasPreviousPage())
164 history.getPreviousPage();
165 }
166 else if (browserAction.equalsIgnoreCase(BROWSER_ACTION_NEXT_PAGE))
167 {
168 if (history.hasNextPage())
169 history.getNextPage();
170 }
171 }
172
173 return ;
174 }
175
176
177 String webContentURL = actionRequest.getParameter(WebContentRewriter.ACTION_PARAMETER_URL);
178 String webContentMethod = actionRequest.getParameter(WebContentRewriter.ACTION_PARAMETER_METHOD);
179 Map webContentParams = new HashMap(actionRequest.getParameterMap()) ;
180
181
182 if (webContentMethod == null) webContentMethod = "" ;
183
184
185 webContentParams.remove(WebContentRewriter.ACTION_PARAMETER_URL);
186 webContentParams.remove(WebContentRewriter.ACTION_PARAMETER_METHOD);
187
188 if (webContentURL == null || actionRequest.getPortletMode() == PortletMode.EDIT)
189 {
190 processPreferencesAction(actionRequest, actionResponse);
191 webContentURL = actionRequest.getPreferences().getValue("SRC", "http://portals.apache.org");
192
193
194 webContentParams.clear();
195 }
196
197
198
199
200 if (webContentURL != null && webContentURL.length() > 0)
201 {
202
203 WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(actionRequest, HISTORY);
204 if (history == null)
205 history = new WebContentHistoryList();
206 history.visitPage(new WebContentHistoryPage(webContentURL,webContentParams,webContentMethod));
207 PortletMessaging.publish(actionRequest, HISTORY, history);
208 }
209 }
210
211 /***
212 * doView Renders the URL in the following order 1) SESSION_PARAMETER
213 * 2)cached version 3) defined for preference SRC
214 */
215 public void doView(RenderRequest request, RenderResponse response) throws PortletException, IOException
216 {
217 String viewPage = (String)request.getAttribute(PARAM_VIEW_PAGE);
218 if (viewPage != null)
219 {
220 super.doView(request, response);
221 return;
222 }
223
224
225 WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(request, HISTORY);
226 if (history == null)
227 history = new WebContentHistoryList();
228 WebContentHistoryPage currentPage = history.getCurrentPage();
229 if (currentPage == null)
230 {
231 String sourceURL = request.getPreferences().getValue("SRC", "");
232 if (sourceURL == null)
233 {
234
235 throw new PortletException("WebContent source not specified. Go to edit mode and specify an URL.");
236 }
237 currentPage = new WebContentHistoryPage(sourceURL);
238 }
239
240
241 if (rewriteController == null)
242 {
243 PortletContext portletApplication = getPortletContext();
244 String path = portletApplication.getRealPath("/WEB-INF");
245 String contextPath = path + "/";
246 try
247 {
248
249 rewriteController = getController(contextPath);
250 }
251 catch (Exception e)
252 {
253
254 String msg = "WebContentPortlet failed to create rewriter controller.";
255 log.error(msg,e);
256 throw new PortletException(e.getMessage());
257 }
258 }
259
260
261 response.setContentType("text/html");
262 byte[] content = doWebContent(currentPage.getUrl(), currentPage.getParams(), currentPage.isPost(), request, response);
263
264
265
266 PrintWriter writer = response.getWriter();
267 writer.print("<block>");
268 if (history.hasPreviousPage())
269 {
270 PortletURL prevAction = response.createActionURL() ;
271 prevAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_PREVIOUS_PAGE);
272 writer.print(" [<a href=\"" + prevAction.toString() +"\">Previous Page</a>] ");
273 }
274 PortletURL refreshAction = response.createActionURL() ;
275 refreshAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_REFRESH_PAGE);
276 writer.print(" [<a href=\"" + refreshAction.toString() +"\">Refresh Page</a>] ");
277 if (history.hasNextPage())
278 {
279 PortletURL nextAction = response.createActionURL() ;
280 nextAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_NEXT_PAGE);
281 writer.print(" [<a href=\"" + nextAction.toString() +"\">Next Page</a>] ");
282 }
283 writer.print("</block><hr/>");
284
285
286 ByteArrayInputStream bais = new ByteArrayInputStream(content);
287 drain(new InputStreamReader(bais, WebContentPortlet.defaultEncoding), writer);
288 bais.close();
289
290
291 history.visitPage(currentPage);
292 PortletMessaging.publish(request, HISTORY, history);
293 }
294
295 public void doEdit(RenderRequest request, RenderResponse response) throws PortletException, IOException
296 {
297 response.setContentType("text/html");
298 doPreferencesEdit(request, response);
299 }
300
301
302
303
304 protected byte[] doWebContent(String sourceAttr, Map sourceParams, boolean isPost, RenderRequest request, RenderResponse response)
305 throws PortletException
306 {
307 HttpMethod httpMethod = null ;
308
309 try
310 {
311
312 PortletURL action = response.createActionURL();
313 ((WebContentRewriter) rewriter).setActionURL(action);
314 URL baseURL = new URL(sourceAttr);
315 rewriter.setBaseUrl(baseURL.toString());
316
317
318 if (baseURL.getProtocol().equals("file"))
319 {
320 Reader reader = new InputStreamReader((InputStream)baseURL.getContent());
321 StringWriter writer = new StringWriter();
322 rewriter.rewrite(rewriteController.createParserAdaptor("text/html"), reader, writer);
323 writer.flush();
324 return writer.toString().getBytes();
325 }
326
327
328
329 HttpClient httpClient = getHttpClient(request) ;
330 String method = (isPost) ? FORM_POST_METHOD : FORM_GET_METHOD;
331 httpMethod = getHttpMethod(httpClient, getURLSource(sourceAttr, sourceParams, request, response), sourceParams, method, request);
332 byte[] result = doPreemptiveAuthentication(httpClient, httpMethod, request, response);
333
334
335 if (result == null) {
336 return doHttpWebContent(httpClient, httpMethod, 0, request, response);
337 } else {
338 return result;
339 }
340 }
341 catch (PortletException pex)
342 {
343
344 throw pex;
345 }
346 catch (Exception ex)
347 {
348 String msg = "Exception while rewritting HTML content" ;
349 log.error(msg,ex);
350 throw new PortletException(msg+", Error: "+ex.getMessage());
351 }
352 finally
353 {
354
355 if (httpMethod != null)
356 httpMethod.releaseConnection();
357 }
358 }
359
360 protected byte[] doHttpWebContent(HttpClient httpClient, HttpMethod httpMethod, int retryCount, RenderRequest request, RenderResponse response)
361 throws PortletException
362 {
363 try
364 {
365
366
367
368
369 httpClient.executeMethod(httpMethod);
370
371
372 rewriter.setBaseUrl( rewriter.getBaseRelativeUrl( httpMethod.getPath() )) ;
373
374
375
376 Cookie[] cookies = httpClient.getState().getCookies();
377 PortletMessaging.publish(request, HTTP_STATE, cookies);
378
379
380
381
382 int responseCode = httpMethod.getStatusCode();
383 if (responseCode >= 300 && responseCode <= 399)
384 {
385
386 Header locationHeader = httpMethod.getResponseHeader("location");
387 String redirectLocation = locationHeader != null ? locationHeader.getValue() : null ;
388 if (redirectLocation != null)
389 {
390
391
392
393 return doWebContent( redirectLocation, new HashMap(), false, request, response ) ;
394 }
395 else
396 {
397
398 throw new PortletException("Redirection code: "+responseCode+", but with no redirectionLocation set.");
399 }
400 }
401 else if ( responseCode >= 400 )
402 {
403 if ( responseCode == 401 )
404 {
405 if (httpMethod.getHostAuthState().isAuthRequested() && retryCount++ < 1 && doRequestedAuthentication( httpClient, httpMethod, request, response))
406 {
407
408 return doHttpWebContent(httpClient, httpMethod, retryCount, request, response);
409 }
410 else
411 {
412
413 throw new PortletException("Site requested authorization, but we are unable to provide credentials");
414 }
415 }
416 else if (retryCount++ < 3)
417 {
418 log.info("WebContentPortlet.doHttpWebContent() - retrying: "+httpMethod.getPath()+", response code: "+responseCode);
419
420
421 return doHttpWebContent(httpClient, httpMethod, retryCount, request, response);
422 }
423 else
424 {
425
426 throw new PortletException("Failure reading: "+httpMethod.getPath()+", response code: "+responseCode);
427 }
428 }
429
430
431
432
433 BufferedInputStream bis = new BufferedInputStream(httpMethod.getResponseBodyAsStream());
434 String encoding = ((HttpMethodBase)httpMethod).getResponseCharSet();
435 if (encoding == null)
436 encoding = getContentCharSet(bis);
437 Reader htmlReader = new InputStreamReader(bis, encoding);
438
439
440 if (encoding == null)
441 encoding = WebContentPortlet.defaultEncoding ;
442 ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
443 Writer htmlWriter = new OutputStreamWriter(byteOutputStream, encoding);
444
445
446 rewriter.rewrite(rewriteController.createParserAdaptor("text/html"), htmlReader, htmlWriter);
447 htmlWriter.flush();
448
449
450
451
452 return byteOutputStream.toByteArray();
453 }
454 catch (UnsupportedEncodingException ueex)
455 {
456 throw new PortletException("Encoding " + defaultEncoding + " not supported. Error: " + ueex.getMessage());
457 }
458 catch (RewriterException rwe)
459 {
460 throw new PortletException("Failed to rewrite HTML page. Error: " + rwe.getMessage());
461 }
462 catch (Exception e)
463 {
464 throw new PortletException("Exception while rewritting HTML page. Error: " + e.getMessage());
465 }
466 }
467
468 protected String getURLSource(String source, Map params, RenderRequest request, RenderResponse response)
469 {
470 return source;
471 }
472
473 protected byte[] doPreemptiveAuthentication(HttpClient clent,HttpMethod method, RenderRequest request, RenderResponse response)
474 {
475
476 return null ;
477 }
478
479 protected boolean doRequestedAuthentication(HttpClient clent,HttpMethod method, RenderRequest request, RenderResponse response)
480 {
481
482 return false ;
483 }
484
485
486
487
488 private RewriterController getController(String contextPath) throws Exception
489 {
490 Class[] rewriterClasses = new Class[]
491 { WebContentRewriter.class, WebContentRewriter.class};
492
493 Class[] adaptorClasses = new Class[]
494 { NekoParserAdaptor.class, SaxParserAdaptor.class};
495 RewriterController rwc = new JetspeedRewriterController(contextPath + "conf/rewriter-rules-mapping.xml", Arrays
496 .asList(rewriterClasses), Arrays.asList(adaptorClasses));
497
498 FileReader reader = new FileReader(contextPath + "conf/default-rewriter-rules.xml");
499
500 Ruleset ruleset = rwc.loadRuleset(reader);
501 reader.close();
502 rewriter = rwc.createRewriter(ruleset);
503 return rwc;
504 }
505
506 protected HttpClient getHttpClient(RenderRequest request) throws IOException
507 {
508
509 HttpClient client = new HttpClient();
510
511
512 Cookie[] cookies = (Cookie[])PortletMessaging.receive(request, HTTP_STATE);
513 if (cookies != null)
514 {
515
516 client.getState().addCookies(cookies);
517
518
519
520 }
521
522 return client ;
523 }
524
525 protected HttpMethodBase getHttpMethod(HttpClient client, String uri, Map params, String formMethod, RenderRequest request) throws IOException
526 {
527 formMethod = FORM_MULTIPART_METHOD;
528 HttpMethodBase httpMethod = null;
529 String useragentProperty = request.getProperty("User-Agent");
530 if(formMethod.equalsIgnoreCase(FORM_MULTIPART_METHOD)){
531
532 MultipartPostMethod mutlitPart = (MultipartPostMethod)( httpMethod = new MultipartPostMethod(uri)) ;
533 if (params != null && !params.isEmpty())
534 {
535 Iterator iter = params.entrySet().iterator();
536 while (iter.hasNext())
537 {
538 Map.Entry entry = (Map.Entry)iter.next();
539 String name = (String)entry.getKey();
540 String[] values = (String[])entry.getValue();
541 if (values != null)
542 for (int i=0,limit=values.length; i<limit; i++)
543 {
544
545
546 mutlitPart.addParameter(name, values[i]);
547 }
548 }
549 }
550
551 }else if (formMethod.equalsIgnoreCase(FORM_GET_METHOD)){
552
553
554
555 httpMethod = new GetMethod(uri);
556 if (params != null && !params.isEmpty())
557 {
558 ArrayList pairs = new ArrayList();
559 Iterator iter = params.entrySet().iterator();
560 while (iter.hasNext())
561 {
562 Map.Entry entry = (Map.Entry)iter.next() ;
563 String name = (String)entry.getKey() ;
564 String[] values = (String [])entry.getValue() ;
565 if (values != null)
566 for (int i = 0,limit = values.length; i < limit; i++)
567 {
568
569 pairs.add(new NameValuePair(name, values[i]));
570 }
571 }
572 httpMethod.setQueryString((NameValuePair[])pairs.toArray(new NameValuePair[pairs.size()]));
573 }
574
575
576 httpMethod.setFollowRedirects(true);
577 }else if (formMethod.equalsIgnoreCase(FORM_POST_METHOD)) {
578
579
580
581 PostMethod postMethod = (PostMethod)( httpMethod = new PostMethod(uri)) ;
582 if (params != null && !params.isEmpty())
583 {
584 Iterator iter = params.entrySet().iterator();
585 while (iter.hasNext())
586 {
587 Map.Entry entry = (Map.Entry)iter.next();
588 String name = (String)entry.getKey();
589 String[] values = (String[])entry.getValue();
590 if (values != null)
591 for (int i=0,limit=values.length; i<limit; i++)
592 {
593
594
595 postMethod.addParameter(name, values[i]);
596 }
597 }
598 }
599 }
600
601
602 httpMethod.addRequestHeader( "User-Agent", useragentProperty );
603
604
605
606
607
608 return httpMethod ;
609 }
610
611
612
613
614 static final int BLOCK_SIZE = 4096;
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639 private void drain(Reader r, Writer w) throws IOException
640 {
641 char[] bytes = new char[BLOCK_SIZE];
642 try
643 {
644 int length = r.read(bytes);
645 while (length != -1)
646 {
647 if (length != 0)
648 {
649 w.write(bytes, 0, length);
650 }
651 length = r.read(bytes);
652 }
653 }
654 finally
655 {
656 bytes = null;
657 }
658
659 }
660
661
662
663
664
665
666
667
668
669
670 private String getContentCharSet(InputStream is) throws IOException
671 {
672 if (!is.markSupported())
673 {
674 return null;
675 }
676
677 byte[] buf = new byte[BLOCK_SIZE];
678 try
679 {
680 is.mark(BLOCK_SIZE);
681 is.read(buf, 0, BLOCK_SIZE);
682 String content = new String(buf, "ISO-8859-1");
683 String lowerCaseContent = content.toLowerCase();
684 int startIndex = lowerCaseContent.indexOf("<head");
685 if (startIndex == -1)
686 {
687 startIndex = 0;
688 }
689 int endIndex = lowerCaseContent.indexOf("</head");
690 if (endIndex == -1)
691 {
692 endIndex = content.length();
693 }
694 content = content.substring(startIndex, endIndex);
695
696 StringTokenizer st = new StringTokenizer(content, "<>");
697 while (st.hasMoreTokens())
698 {
699 String element = st.nextToken();
700 String lowerCaseElement = element.toLowerCase();
701 if (lowerCaseElement.startsWith("meta") && lowerCaseElement.indexOf("content-type") > 0)
702 {
703 StringTokenizer est = new StringTokenizer(element, " =\"\';");
704 while (est.hasMoreTokens())
705 {
706 if (est.nextToken().equalsIgnoreCase("charset"))
707 {
708 if (est.hasMoreTokens())
709 {
710 return est.nextToken();
711 }
712 }
713 }
714 }
715 }
716 }
717 catch (IOException e)
718 {
719 }
720 finally
721 {
722 is.reset();
723 }
724
725 return null;
726 }
727 }