%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
org.apache.jetspeed.search.handlers.URLToDocHandler |
|
|
1 | /* |
|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
|
3 | * contributor license agreements. See the NOTICE file distributed with |
|
4 | * this work for additional information regarding copyright ownership. |
|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
|
6 | * (the "License"); you may not use this file except in compliance with |
|
7 | * the License. You may obtain a copy of the License at |
|
8 | * |
|
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
10 | * |
|
11 | * Unless required by applicable law or agreed to in writing, software |
|
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 | * See the License for the specific language governing permissions and |
|
15 | * limitations under the License. |
|
16 | */ |
|
17 | package org.apache.jetspeed.search.handlers; |
|
18 | ||
19 | // Java imports |
|
20 | import java.io.IOException; |
|
21 | import java.net.URL; |
|
22 | ||
23 | import org.apache.commons.httpclient.HttpClient; |
|
24 | import org.apache.commons.httpclient.HttpException; |
|
25 | import org.apache.commons.httpclient.methods.GetMethod; |
|
26 | import org.apache.jetspeed.search.AbstractObjectHandler; |
|
27 | import org.apache.jetspeed.search.BaseParsedObject; |
|
28 | ||
29 | /** |
|
30 | * This object handler deals with URLs. |
|
31 | * |
|
32 | * @author <a href="mailto:morciuch@apache.org">Mark Orciuch</a> |
|
33 | * @version $Id: URLToDocHandler.java 516448 2007-03-09 16:25:47Z ate $ |
|
34 | */ |
|
35 | 0 | public class URLToDocHandler extends AbstractObjectHandler |
36 | { |
|
37 | /** |
|
38 | * Static initialization of the logger for this class |
|
39 | */ |
|
40 | //private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLToDocHandler.class.getName()); |
|
41 | ||
42 | /** |
|
43 | * Parses a specific object into a document suitable for index placement |
|
44 | * |
|
45 | * @param o |
|
46 | * @return |
|
47 | */ |
|
48 | public org.apache.jetspeed.search.ParsedObject parseObject(Object o) |
|
49 | { |
|
50 | 0 | org.apache.jetspeed.search.ParsedObject result = new BaseParsedObject(); |
51 | ||
52 | 0 | if ((o instanceof URL) == false) |
53 | { |
|
54 | //logger.error("URLToDocHandler: invalid object type: " + o); |
|
55 | 0 | return null; |
56 | } |
|
57 | ||
58 | 0 | URL pageToAdd = (URL) o; |
59 | ||
60 | 0 | HttpClient client = new HttpClient(); |
61 | 0 | GetMethod method = new GetMethod(pageToAdd.toString()); |
62 | 0 | method.setFollowRedirects(true); |
63 | 0 | int statusCode = -1; |
64 | 0 | int attempt = 0; |
65 | ||
66 | try |
|
67 | { |
|
68 | // We will retry up to 3 times. |
|
69 | 0 | while (statusCode == -1 && attempt < 3) |
70 | { |
|
71 | try |
|
72 | { |
|
73 | // execute the method. |
|
74 | 0 | client.executeMethod(method); |
75 | 0 | statusCode = method.getStatusCode(); |
76 | //if (logger.isDebugEnabled()) |
|
77 | { |
|
78 | //logger.debug("URL = " + pageToAdd.toString() + "Status code = " + statusCode); |
|
79 | } |
|
80 | } |
|
81 | 0 | catch (HttpException e) |
82 | { |
|
83 | // We will retry |
|
84 | 0 | attempt++; |
85 | } |
|
86 | 0 | catch (IOException e) |
87 | { |
|
88 | 0 | return null; |
89 | 0 | } |
90 | } |
|
91 | // Check that we didn't run out of retries. |
|
92 | 0 | if (statusCode != -1) |
93 | { |
|
94 | 0 | String content = null; |
95 | try |
|
96 | { |
|
97 | 0 | content = method.getResponseBodyAsString(); |
98 | } |
|
99 | 0 | catch (Exception ioe) |
100 | { |
|
101 | //logger.error("Getting content for " + pageToAdd.toString(), ioe); |
|
102 | 0 | } |
103 | ||
104 | 0 | if (content != null) |
105 | { |
|
106 | try |
|
107 | { |
|
108 | 0 | result.setKey(java.net.URLEncoder.encode(pageToAdd.toString(),"UTF-8")); |
109 | 0 | result.setType(org.apache.jetspeed.search.ParsedObject.OBJECT_TYPE_URL); |
110 | // TODO: We should extract the <title> tag here. |
|
111 | 0 | result.setTitle(pageToAdd.toString()); |
112 | 0 | result.setContent(content); |
113 | 0 | result.setDescription(""); |
114 | 0 | result.setLanguage(""); |
115 | 0 | result.setURL(pageToAdd); |
116 | 0 | result.setClassName(o.getClass().getName()); |
117 | //logger.info("Parsed '" + pageToAdd.toString() + "'"); |
|
118 | } |
|
119 | 0 | catch (Exception e) |
120 | { |
|
121 | 0 | e.printStackTrace(); |
122 | //logger.error("Adding document to index", e); |
|
123 | 0 | } |
124 | } |
|
125 | } |
|
126 | } |
|
127 | finally |
|
128 | { |
|
129 | 0 | method.releaseConnection(); |
130 | 0 | } |
131 | ||
132 | 0 | return result; |
133 | ||
134 | } |
|
135 | } |
|
136 |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |