%--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--%><%@ page import="java.io.FilterInputStream,
java.io.IOException,
java.io.InputStream,
java.io.InputStreamReader,
java.net.URL,
java.net.URLConnection,
java.net.URLDecoder,
java.net.URLEncoder,
java.util.ArrayList,
java.util.Arrays,
java.util.Calendar,
java.util.Collections,
java.util.Iterator,
java.util.List,
javax.jcr.Node,
javax.jcr.Repository,
javax.jcr.Session,
javax.jcr.SimpleCredentials,
javax.swing.text.AttributeSet,
javax.swing.text.html.HTML,
javax.swing.text.html.HTMLDocument,
javax.swing.text.html.HTMLEditorKit,
org.apache.jackrabbit.j2ee.RepositoryAccessServlet,
org.apache.jackrabbit.util.Text"
%><%@ page contentType="text/html;charset=UTF-8" %><%
Repository rep;
Session jcrSession;
String wspName;
try {
rep = RepositoryAccessServlet.getRepository(pageContext.getServletContext());
jcrSession = rep.login(new SimpleCredentials("user", "".toCharArray()));
wspName = jcrSession.getWorkspace().getName();
} catch (Throwable e) {
%>Error while accessing the repository: <%= Text.encodeIllegalXMLCharacters(e.getMessage()) %>
<%
%>Check the configuration or use the easy setup wizard.<%
return;
}
try {
String seedWord = request.getParameter("seed");
if (seedWord != null) {
seedWord = new String(seedWord.getBytes("ISO-8859-1"), "UTF-8");
}
int numDocs = 0;
List filetypes = new ArrayList();
if (request.getParameter("num") != null) {
try {
numDocs = Integer.parseInt(request.getParameter("num"));
} catch (NumberFormatException e) {
// ignore
}
}
String[] types = request.getParameterValues("filetype");
if (types != null) {
filetypes.addAll(Arrays.asList(types));
} else {
filetypes = DEFAULT_TYPES;
}
if (seedWord != null && numDocs > 0 && filetypes.size() > 0) { %>
Overall progress
Downloading document
<% Node root = jcrSession.getRootNode(); int n = 0; for (int typeIdx = 0; typeIdx < filetypes.size(); typeIdx++) { String type = (String) filetypes.get(typeIdx); int offset = 0; while (n < numDocs * (typeIdx + 1) / filetypes.size()) { final URL[] urls = new Search(type, seedWord, offset).getURLs(); if (urls.length == 0) { break; } for (int i = 0; i < urls.length; i++) { final URL currentURL = urls[i]; String path = urls[i].getPath(); if (path.startsWith("/")) { path = path.substring(1); } final String host = urls[i].getHost(); List folderNames = new ArrayList(); folderNames.addAll(Arrays.asList(host.split("\\."))); Collections.reverse(folderNames); folderNames.addAll(Arrays.asList(path.split("/", 0))); final String fileName = URLDecoder.decode((String) folderNames.remove(folderNames.size() - 1), "UTF-8").replaceAll(":", "_"); Node node = root; for (Iterator fn = folderNames.iterator(); fn.hasNext(); ) { String name = URLDecoder.decode((String) fn.next(), "UTF-8"); name = name.replaceAll(":", "_"); if (name.length() == 0) { continue; } if (!node.hasNode(name)) { node.addNode(name, "nt:folder"); } node = node.getNode(name); } if (!node.hasNode(fileName)) { final JspWriter fOut = out; Node file = node.addNode(fileName, "nt:file"); final Node resource = file.addNode("jcr:content", "nt:resource"); final Exception[] ex = new Exception[1]; Thread t = new Thread(new Runnable() { public void run() { try { String info = fileName + " (" + host + ")"; URLConnection con = currentURL.openConnection(); InputStream in = con.getInputStream(); try { synchronized (fOut) { fOut.println(""); fOut.flush(); } int length = con.getContentLength(); if (length != -1) { in = new ProgressInputStream(in, length, info, "dp", fOut); } resource.setProperty("jcr:data", in); String mimeType = URLConnection.guessContentTypeFromName(fileName); if (mimeType == null) { if (fileName.endsWith(".doc")) { mimeType = "application/msword"; } else if (fileName.endsWith(".xls")) { mimeType = "application/vnd.ms-excel"; } else if (fileName.endsWith(".ppt")) { mimeType = "application/mspowerpoint"; } else { mimeType = "application/octet-stream"; } } resource.setProperty("jcr:mimeType", mimeType); Calendar lastModified = Calendar.getInstance(); lastModified.setTimeInMillis(con.getLastModified()); resource.setProperty("jcr:lastModified", lastModified); } finally { in.close(); } } catch (Exception e) { ex[0] = e; } } }); t.start(); for (int s = 0; t.isAlive(); s++) { Thread.sleep(100); if (s % 10 == 0) { synchronized (fOut) { fOut.println(""); fOut.flush(); } } } if (ex[0] == null) { jcrSession.save(); n++; synchronized (fOut) { fOut.println(""); fOut.flush(); } if (n >= numDocs * (typeIdx + 1) / filetypes.size()) { break; } } else { jcrSession.refresh(false); } } } offset += 10; } } %>This page allows you to populate the workspace with documents downloaded from the Internet.