java.io.FileNotFoundException java.util.Enumeration java.util.HashSet java.util.Hashtable java.util.Iterator java.util.StringTokenizer java.util.Vector org.apache.avalon.framework.context.ContextException org.apache.avalon.framework.component.ComponentException org.apache.cocoon.environment.Session org.apache.lenya.ac.Accreditable org.apache.lenya.ac.Identifiable org.apache.lenya.ac.Identity org.apache.lenya.lucene.ReTokenizeFile org.apache.lenya.lucene.Publication org.apache.lucene.analysis.Analyzer org.apache.lucene.analysis.standard.StandardAnalyzer org.apache.lucene.document.Document org.apache.lucene.document.Field org.apache.lucene.queryParser.QueryParser org.apache.lucene.queryParser.MultiFieldQueryParser org.apache.lucene.search.Hits org.apache.lucene.search.IndexSearcher org.apache.lucene.search.Query org.apache.lucene.search.Searcher org.apache.lucene.search.Sort File workDir = null; File indexDir=null; File excerptDir=null; String[] fields={"contents","title"}; String field = "contents"; Vector roles = new Vector(); Hashtable protectedAreas = new Hashtable(); /** * Contextualize this class */ public void contextualize(Context context) throws ContextException { super.contextualize( context ); workDir = (File) context.get(Constants.CONTEXT_WORK_DIR); } /** * Search index */ Hits search(String query_string, String publication_id, String sortField, boolean sortReverse) throws ProcessingException, IOException{ // Load roles Session session = request.getSession(true); if(session != null){ Identity id=(Identity) session.getAttribute("org.apache.lenya.ac.Identity"); if(id != null){ Identifiable[] ids = id.getIdentifiables(); Accreditable[] acs =id.getAccreditables(); for (int ai = 0; ai < acs.length; ai++) { boolean found = false; for(int i = 0; i < ids.length; i++){ if(ids[i].toString().equals(acs[ai].toString())){ found = true;} } if(!found){ roles.add(acs[ai].toString()); } } } // id } // session hits=null; try{ Searcher searcher=new IndexSearcher(indexDir.getAbsolutePath()); Analyzer l_analyzer=new StandardAnalyzer(); QueryParser l_queryParser = new QueryParser(field,l_analyzer); // Single field l_queryParser.setOperator(QueryParser.DEFAULT_OPERATOR_AND); getLogger().debug(query_string); Query l_query = l_queryParser.parse(query_string); // Single field if (sortField != null) { Sort sort = new Sort(sortField, sortReverse); hits = searcher.search(l_query, sort); }else{ hits = searcher.search(l_query); } if(hits != null){ return hits; } }catch(IOException e){ System.err.println(".search(): EXCEPTION: "+e); throw e; }catch(Exception e){ System.err.println(".search(): EXCEPTION: "+e); } return null; } /** * */ String getPercent(float score){ return ""+java.lang.Math.round(score*100.0); } Hits hits; int hits_length=-1; String[] words=new String[0]; int hitsPerPage; int maxPages; int excerptOffset; int start; int end; // *********************** // *** Protected Areas *** // *********************** // DEFINITION: protectedAreas.add("/UrlStart", "group,group"); // UrlStart begins with / after .../live. // There are no spaces between groups and commas. protectedAreas.put("/employee", "employee"); // Get sitemap path org.apache.excalibur.source.Source input_source=this.resolver.resolveURI(""); String sitemapPath=input_source.getURI(); sitemapPath=sitemapPath.substring(5); // Remove "file:" protocol // Read parameters from sitemap String numberOfPubs = parameters.getParameter("number-of-pubs", "1"); Publication[] pubs = new Publication[Integer.parseInt(numberOfPubs)]; for(int i = 0;i < pubs.length;i++) { pubs[i] = new Publication(); pubs[i].id = parameters.getParameter("pub"+i+"-id","@ID@"); pubs[i].name = parameters.getParameter("pub"+i+"-name","@NAME@"); pubs[i].indexDir = parameters.getParameter("pub"+i+"-index-dir","@INDEX-DIR@"); pubs[i].searchFields = parameters.getParameter("pub"+i+"-search-fields","title,contents"); pubs[i].excerptDir = parameters.getParameter("pub"+i+"-excerpt-dir","@EXCERPT-DIR@"); pubs[i].prefix = parameters.getParameter("pub"+i+"-prefix","@PREFIX@"); } String param_hits_per_page = parameters.getParameter("max-hits-per-page","13"); hitsPerPage = Integer.parseInt(param_hits_per_page); String param_max_pages = parameters.getParameter("max-pages","5"); maxPages = Integer.parseInt(param_max_pages); String param_excerpt_offset = parameters.getParameter("excerpt-offset","100"); excerptOffset = Integer.parseInt(param_excerpt_offset); // Read parameters from query string String urlQuerystring = ; String query = ; String publication_id = ; String sortBy = ; String sortReverse = ; String language = ""; StringTokenizer qsTokens = new StringTokenizer(urlQuerystring, "&=", true); String token = ""; HashSet languageSet = new HashSet(); if(qsTokens.hasMoreTokens()){ token = qsTokens.nextToken(); } while(qsTokens.hasMoreTokens()){ if(token.equalsIgnoreCase("language")){ token = qsTokens.nextToken(); if(token.equals("=") && qsTokens.hasMoreTokens()){ languageSet.add(qsTokens.nextToken()); } }else{ token = qsTokens.nextToken(); } } Iterator languageSetItems = languageSet.iterator(); if(languageSetItems.hasNext()){ language = languageSetItems.next().toString(); } while(languageSetItems.hasNext()){ language += "," + languageSetItems.next().toString(); } if(language.length() == 0) language = "en"; String startString = ; String endString = ; start=new Integer(startString).intValue(); if(endString == null){ end=hitsPerPage; }else{ end=new Integer(endString).intValue(); } // Find the number of the selected publication int whichPublication=0; for (int i = 0;i < pubs.length;i++) { if (pubs[i].id.equals(publication_id)) { whichPublication = i; } } // Get all search fields Vector twords = null; Vector myFields = new Vector(); Enumeration parameterNames = request.getParameterNames(); while(parameterNames.hasMoreElements()){ String parameterName=(String)parameterNames.nextElement(); String value=request.getParameter(parameterName); if (parameterName.indexOf(".fields") > 0) { // looking for field parameters StringTokenizer st = new StringTokenizer(parameterName, "."); int length = st.countTokens(); if(st.hasMoreTokens()){ String fieldPublicationId = st.nextToken(); if(st.hasMoreTokens()){ if(fieldPublicationId.equals(publication_id) || fieldPublicationId.equals("dummy-index-id")) { st.nextToken(); // Ignore "fields" token if(length == 2) { // radio or select myFields.addElement(value); }else if (length == 3) { // checkbox myFields.addElement(st.nextToken()); }else{ // something is wrong } } } } } } if(myFields.size() > 0) { field = (String)myFields.elementAt(0); fields = new String[myFields.size()]; for(int i = 0; i < myFields.size(); i++) { fields[i] = (String)myFields.elementAt(i); } } // Set index and excerpt dir String param_index_dir=pubs[whichPublication].indexDir; if(param_index_dir.charAt(0) == '/'){ indexDir=new File(param_index_dir); } else{ indexDir=new File(sitemapPath+File.separator+param_index_dir); } String param_excerpt_dir=pubs[whichPublication].excerptDir; if(param_excerpt_dir.charAt(0) == '/'){ excerptDir=new File(param_excerpt_dir); }else{ excerptDir=new File(sitemapPath+File.separator+param_excerpt_dir); } publication_id numberOfPubs for(int i = 0;i < pubs.length;i++) { pubs[i].id pubs[i].name pubs[i].indexDir String[] searchFields = pubs[i].getFields(); if (searchFields != null) { for (int k = 0; k < searchFields.length; k++) { searchFields[k] } } else { pubs[i].searchFields.getFields() returned null } pubs[i].excerptDir pubs[i].prefix } hitsPerPage maxPages excerptOffset Enumeration para_names = request.getParameterNames(); if(para_names.hasMoreElements()){ while(para_names.hasMoreElements()){ String para_name=(String)para_names.nextElement(); String para_value=request.getParameter(para_name); para_namepara_value para_namepara_value } } if(query != null && query.length() != 0 && publication_id != null && publication_id.length() > 0){ try { if (sortBy.equals("score")) { hits = search(query, publication_id, null, false); } else { if (sortReverse.equals("true")) { hits = search(query, publication_id, sortBy, true); } else { hits = search(query, publication_id, sortBy, false); } } } catch(Exception e) { e.toString() } if(hits != null){ hits_length=hits.length(); } else{ hits_length=-1; hits=null; } publication_id pubs[whichPublication].name pubs[whichPublication].prefix sortBy query if(query != null){ twords = new Vector(); StringTokenizer st=new StringTokenizer(query," "); while(st.hasMoreTokens()){ String word=(String)st.nextElement(); if(!(word.equalsIgnoreCase("OR") || word.equalsIgnoreCase("AND"))){ word twords.addElement(word); } } words=new String[twords.size()]; for(int i=0;i<twords.size();i++){ words[i]=(String)twords.elementAt(i); } } start end language for (int i = 0; i < fields.length; i++) { fields[i] } try{ Analyzer ll_analyzer=new StandardAnalyzer(); QueryParser queryParser = new QueryParser(field,ll_analyzer); //MultiFieldQueryParser queryParser = new MultiFieldQueryParser("contents",ll_analyzer); queryParser.setOperator(QueryParser.DEFAULT_OPERATOR_AND); Query ll_query = queryParser.parse(query); //Query ll_query = queryParser.parse(query,fields,ll_analyzer); ll_query.toString("contents") } catch(Exception e){ e.toString() } } else{ hits_length=-1; hits=null; } if(hits != null){ int validCount = 0; //number of valid results if(hits_length > 0){ // i = index of result. validCount = count valid results. for (int i = 0; (i < hits.length()); i++) { Document ldoc=hits.doc(i); Enumeration lfields = ldoc.fields(); String lpath=ldoc.get("path"); String lurl=ldoc.get("url"); String ltitle=ldoc.get("title"); String mime_type=ldoc.get("mime-type"); String docLanguage = ""; if(lpath != null){ getPercent(hits.score(i)) hits.score(i) lpath } else if(lurl != null){ // Check Language // This also filters sitetree.xml since it has no language. docLanguage = ""; while (lfields.hasMoreElements()) { Field lfield = (Field)lfields.nextElement(); if(0 == lfield.name().compareTo("language")){ docLanguage = lfield.stringValue(); } } language docLanguage if((docLanguage.length() > 0) && (language.indexOf(docLanguage) != -1)){ // Get URL parts String parent = ""; String filename = ""; String querystring = ""; if(lurl.lastIndexOf("/") > -1) { parent = lurl.substring(0,lurl.lastIndexOf("/")); filename = lurl.substring(lurl.lastIndexOf("/")+1); } if(lurl.indexOf("?") > -1) { querystring = lurl.substring(lurl.indexOf("?")); } // Check Restricted boolean restricted = false; // Get list of restricted prefixes and check against roles. Enumeration protectedArea = protectedAreas.keys(); while((!restricted) && protectedArea.hasMoreElements()){ String startUrl = (String) protectedArea.nextElement(); if(parent.startsWith(startUrl)){ StringTokenizer rolesAllowed = new StringTokenizer((String)protectedAreas.get(startUrl), ","); restricted = true; while(rolesAllowed.hasMoreElements()){ // Check roles if(roles.contains(rolesAllowed.nextElement())){ restricted = false; } } } } if(!restricted){ // Build hit validCount++; if((validCount >= start) && (validCount <= end)){ validCount lfields = ldoc.fields(); int first = -1; while (lfields.hasMoreElements()) { Field lfield = (Field)lfields.nextElement(); String slfield = lfield.stringValue(); if(lfield.name().equals("htmlbody")){ String tmphtmlbody = slfield; String upperhtmlbody = tmphtmlbody.toUpperCase(); if(twords != null){ Enumeration twordsE = twords.elements(); while(twordsE.hasMoreElements()){ int last = 0; String word = twordsE.nextElement().toString(); String upperword = word.toUpperCase(); int wordLen = word.length(); StringBuffer sb = new StringBuffer(); int current = upperhtmlbody.indexOf(upperword); if((current < first) || (first == -1)) first = current; while(current > last){ sb.append(tmphtmlbody.substring(last, current)); sb.append("~").append(tmphtmlbody.substring(current, current + wordLen)).append("~"); last = current + wordLen; current = upperhtmlbody.indexOf(upperword, last); } sb.append(tmphtmlbody.substring(last)); tmphtmlbody = sb.toString(); upperhtmlbody = tmphtmlbody.toUpperCase(); } } if(slfield.length() > excerptOffset){ int start = 0; int end = excerptOffset; int half = excerptOffset/2; if(first < half){ end = tmphtmlbody.indexOf(' ', excerptOffset); }else{ start = tmphtmlbody.indexOf(' ', first - half); end = tmphtmlbody.indexOf(' ', start + excerptOffset); } tmphtmlbody = tmphtmlbody.substring(start, end); } StringTokenizer tokens = new StringTokenizer(tmphtmlbody, "~"); boolean needCloseHtmlBody = false; if(tokens.hasMoreTokens()){ needCloseHtmlBody = true; tokens.nextToken() } while(tokens.hasMoreTokens()){ tokens.nextToken() if(tokens.hasMoreTokens()){ tokens.nextToken() } } if(needCloseHtmlBody){ } }else{ lfield.name()slfield } } getPercent(hits.score(i))hits.score(i) parent filename querystring lurl File excerptFile=new File(excerptDir+File.separator+lurl); if((ltitle != null) && (ltitle.length() > 0)){ <xsp:expr>ltitle</xsp:expr> }else{ <xsp:expr>excerptFile.getName()</xsp:expr> } if((mime_type != null) && (mime_type.length() > 0)){ mime_type }else{ } try{ ReTokenizeFile rtf=new ReTokenizeFile(); rtf.setOffset(excerptOffset); String excerpt=rtf.getExcerpt(excerptFile,words); if(excerpt != null){ excerpt=rtf.emphasizeAsXML(rtf.tidy(excerpt),words); excerpt }else{ throw new Exception("excerpt == null. Maybe file does not contain the words!"); } } catch(FileNotFoundException e){ excerptFile.getAbsolutePath()+" "+words[0]+" "+e } catch(Exception e){ ""+e } } } // END - Within range (start-end) } // END - Check Restricted } // END - Check Language } }else{ } int number_of_pages=(validCount/hitsPerPage); if(number_of_pages*hitsPerPage != validCount){ number_of_pages=number_of_pages+1; } if(number_of_pages > maxPages){ number_of_pages=maxPages; } if(validCount == 0){ number_of_pages=0; } else{ for(int i=0;i<number_of_pages;i++){ int pstart=i*hitsPerPage+1; int pend=(i+1)*hitsPerPage; if(validCount < pend){ pend=validCount; } String type="other"; if(pstart == start){ type="current"; } else if(pstart == start-hitsPerPage){ type="previous"; } else if(pstart == start+hitsPerPage){ type="next"; } pstart pend type } } validCount }