Welcome to Apache Jackrabbit - Populate workspace: <%= Text.encodeIllegalXMLCharacters(wspName) %>

Populate workspace: "<%= Text.encodeIllegalXMLCharacters(wspName) %>"

Overall progress

Downloading document

<% Node root = jcrSession.getRootNode(); int n = 0; for (int typeIdx = 0; typeIdx < filetypes.size(); typeIdx++) { String type = (String) filetypes.get(typeIdx); int offset = 0; while (n < numDocs * (typeIdx + 1) / filetypes.size()) { final URL[] urls = new Search(type, seedWord, offset).getURLs(); if (urls.length == 0) { break; } for (int i = 0; i < urls.length; i++) { final URL currentURL = urls[i]; String path = urls[i].getPath(); if (path.startsWith("/")) { path = path.substring(1); } final String host = urls[i].getHost(); List folderNames = new ArrayList(); folderNames.addAll(Arrays.asList(host.split("\\."))); Collections.reverse(folderNames); folderNames.addAll(Arrays.asList(path.split("/", 0))); final String fileName = URLDecoder.decode((String) folderNames.remove(folderNames.size() - 1), "UTF-8").replaceAll(":", "_"); Node node = root; for (Iterator fn = folderNames.iterator(); fn.hasNext(); ) { String name = URLDecoder.decode((String) fn.next(), "UTF-8"); name = name.replaceAll(":", "_"); if (name.length() == 0) { continue; } if (!node.hasNode(name)) { node.addNode(name, "nt:folder"); } node = node.getNode(name); } if (!node.hasNode(fileName)) { final JspWriter fOut = out; Node file = node.addNode(fileName, "nt:file"); final Node resource = file.addNode("jcr:content", "nt:resource"); final Exception[] ex = new Exception[1]; Thread t = new Thread(new Runnable() { public void run() { try { String info = fileName + " (" + host + ")"; URLConnection con = currentURL.openConnection(); InputStream in = con.getInputStream(); try { synchronized (fOut) { fOut.println(""); fOut.flush(); } int length = con.getContentLength(); if (length != -1) { in = new ProgressInputStream(in, length, info, "dp", fOut); } resource.setProperty("jcr:data", in); String mimeType = URLConnection.guessContentTypeFromName(fileName); if (mimeType == null) { if (fileName.endsWith(".doc")) { mimeType = "application/msword"; } else if (fileName.endsWith(".xls")) { mimeType = "application/vnd.ms-excel"; } else if (fileName.endsWith(".ppt")) { mimeType = "application/mspowerpoint"; } else { mimeType = "application/octet-stream"; } } resource.setProperty("jcr:mimeType", mimeType); Calendar lastModified = Calendar.getInstance(); lastModified.setTimeInMillis(con.getLastModified()); resource.setProperty("jcr:lastModified", lastModified); } finally { in.close(); } } catch (Exception e) { ex[0] = e; } } }); t.start(); for (int s = 0; t.isAlive(); s++) { Thread.sleep(100); if (s % 10 == 0) { synchronized (fOut) { fOut.println(""); fOut.flush(); } } } if (ex[0] == null) { jcrSession.save(); n++; synchronized (fOut) { fOut.println(""); fOut.flush(); } if (n >= numDocs * (typeIdx + 1) / filetypes.size()) { break; } } else { jcrSession.refresh(false); } } } offset += 10; } } %>

Seed word (optional):	"/>
Number of documents:
Document types:	/> Adobe Acrobat PDF /> Rich Text Format /> Microsoft Word /> Microsoft PowerPoint /> Microsoft Excel