View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.jetspeed.search.lucene;
18  
19  import java.io.File;
20  import java.io.IOException;
21  import java.net.URL;
22  import java.util.ArrayList;
23  import java.util.Collection;
24  import java.util.HashMap;
25  import java.util.Iterator;
26  import java.util.Map;
27  import java.util.Set;
28  
29  import org.apache.commons.collections.MultiHashMap;
30  import org.apache.commons.collections.MultiMap;
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.jetspeed.search.BaseParsedObject;
34  import org.apache.jetspeed.search.HandlerFactory;
35  import org.apache.jetspeed.search.ObjectHandler;
36  import org.apache.jetspeed.search.ParsedObject;
37  import org.apache.jetspeed.search.SearchEngine;
38  import org.apache.jetspeed.search.SearchResults;
39  import org.apache.lucene.analysis.Analyzer;
40  import org.apache.lucene.analysis.standard.StandardAnalyzer;
41  import org.apache.lucene.document.Document;
42  import org.apache.lucene.document.Field;
43  import org.apache.lucene.index.IndexReader;
44  import org.apache.lucene.index.IndexWriter;
45  import org.apache.lucene.index.Term;
46  import org.apache.lucene.queryParser.MultiFieldQueryParser;
47  import org.apache.lucene.queryParser.ParseException;
48  import org.apache.lucene.search.Hits;
49  import org.apache.lucene.search.IndexSearcher;
50  import org.apache.lucene.search.Query;
51  import org.apache.lucene.search.Searcher;
52  
53  /***
54   * @author <a href="mailto: jford@apache.org">Jeremy Ford</a>
55   *
56   */
57  public class SearchEngineImpl implements SearchEngine
58  {
59      protected final static Log log = LogFactory.getLog(SearchEngineImpl.class);
60      private File rootIndexDir = null;
61      private String analyzerClassName = null;
62      private boolean optimizeAfterUpdate = true;
63      private HandlerFactory handlerFactory;
64      
65      private static final int KEYWORD = 0;
66      private static final int TEXT = 1;
67      
68      public SearchEngineImpl(String indexRoot, String analyzerClassName, boolean optimzeAfterUpdate, HandlerFactory handlerFactory)
69      throws Exception
70      {
71          //assume it's full path for now
72          rootIndexDir = new File(indexRoot);
73          this.analyzerClassName = analyzerClassName;
74          this.optimizeAfterUpdate = optimzeAfterUpdate;
75          this.handlerFactory = handlerFactory;
76          
77          try
78          {
79              Searcher searcher = null;
80              searcher = new IndexSearcher(rootIndexDir.getPath());
81              searcher.close();
82          }
83          catch (Exception e)
84          {
85              if (rootIndexDir.exists())
86              {
87                  log.error("Failed to open Portal Registry indexes in " + rootIndexDir.getPath(), e);
88              }
89              try
90              {
91                  rootIndexDir.delete();
92                  rootIndexDir.mkdirs();
93                  
94                  IndexWriter indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), true);
95                  indexWriter.close();
96                  indexWriter = null;
97                  log.warn("Re-created Lucene Index in " + rootIndexDir.getPath());
98              }
99              catch (Exception e1)
100             {
101                 String message = "Cannot RECREATE Portlet Registry indexes in "  + rootIndexDir.getPath();
102                 log.error(message, e1);
103                 throw new Exception(message);
104             }
105         }
106     }
107 
108     /* (non-Javadoc)
109      * @see org.apache.jetspeed.search.SearchEnging#add(java.lang.Object)
110      */
111     public boolean add(Object o)
112     {
113         Collection c = new ArrayList(1);
114         c.add(o);
115 
116         return add(c);
117     }
118 
119     /* (non-Javadoc)
120      * @see org.apache.jetspeed.search.SearchEnging#add(java.util.Collection)
121      */
122     public synchronized boolean add(Collection objects)
123     {
124         boolean result = false;
125         
126         IndexWriter indexWriter;
127         try
128         {
129             indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), false);
130         }
131         catch (IOException e)
132         {
133             //logger.error("Error while creating index writer. Skipping add...", e);
134             return result;
135         }
136 
137         Iterator it = objects.iterator();
138         while (it.hasNext()) 
139         {
140             Object o = it.next();
141             // Look up appropriate handler
142             ObjectHandler handler = null;
143             try
144             {
145                 handler = handlerFactory.getHandler(o);
146             }
147             catch (Exception e)
148             {
149                 //logger.error("Failed to create hanlder for object " + o.getClass().getName());
150                 continue;
151             }
152 
153             // Parse the object
154             ParsedObject parsedObject = handler.parseObject(o);
155 
156             // Create document
157             Document doc = new Document();
158 
159             // Populate document from the parsed object
160             if (parsedObject.getKey() != null)
161             {                
162                 doc.add(new Field(ParsedObject.FIELDNAME_KEY, parsedObject.getKey(), Field.Store.YES, Field.Index.UN_TOKENIZED));
163             }
164             if (parsedObject.getType() != null)
165             {
166                 doc.add(new Field(ParsedObject.FIELDNAME_TYPE, parsedObject.getType(), Field.Store.YES, Field.Index.TOKENIZED));
167             }
168             if (parsedObject.getTitle() != null)
169             {
170                 doc.add(new Field(ParsedObject.FIELDNAME_TITLE, parsedObject.getTitle(), Field.Store.YES, Field.Index.TOKENIZED));
171             }
172             if (parsedObject.getDescription() != null)
173             {
174                 doc.add(new Field(ParsedObject.FIELDNAME_DESCRIPTION, parsedObject.getDescription(), Field.Store.YES, Field.Index.TOKENIZED));
175             }
176             if (parsedObject.getContent() != null)
177             {
178                 doc.add(new Field(ParsedObject.FIELDNAME_CONTENT, parsedObject.getContent(), Field.Store.YES, Field.Index.TOKENIZED));
179             }
180             if (parsedObject.getLanguage() != null)
181             {
182                 doc.add(new Field(ParsedObject.FIELDNAME_LANGUAGE, parsedObject.getLanguage(), Field.Store.YES, Field.Index.TOKENIZED));
183             }
184             if (parsedObject.getURL() != null)
185             {
186                 doc.add(new Field(ParsedObject.FIELDNAME_URL, parsedObject.getURL().toString(), Field.Store.YES, Field.Index.TOKENIZED));
187             }
188             if(parsedObject.getClassName() != null)
189             {
190                 doc.add(new Field(ParsedObject.FIELDNAME_CLASSNAME, parsedObject.getClassName(), Field.Store.YES, Field.Index.TOKENIZED));
191             }
192             
193             String[] keywordArray = parsedObject.getKeywords();
194             if(keywordArray != null)
195             {
196             	for(int i=0; i<keywordArray.length; ++i)
197             	{
198             		String keyword = keywordArray[i];
199             		doc.add(new Field(ParsedObject.FIELDNAME_KEYWORDS, keyword, Field.Store.YES, Field.Index.UN_TOKENIZED));
200             	}
201             }
202 
203             Map keywords = parsedObject.getKeywordsMap();
204             addFieldsToDocument(doc, keywords, KEYWORD);
205             
206             Map fields = parsedObject.getFields();
207             addFieldsToDocument(doc, fields, TEXT);
208  
209             // Add the document to search index
210             try
211             {
212                 indexWriter.addDocument(doc);
213             }
214             catch (IOException e)
215             {
216                //logger.error("Error adding document to index.", e);
217             }
218             //logger.debug("Index Document Count = " + indexWriter.docCount());
219             //logger.info("Added '" + parsedObject.getTitle() + "' to index");
220             result = true;
221         }
222 
223         try
224         {
225         	if(optimizeAfterUpdate)
226             {
227                 indexWriter.optimize();
228             }
229         }
230         catch (IOException e)
231         {
232             //logger.error("Error while trying to optimize index.");
233         }
234         finally
235         {
236             try
237             {
238                 indexWriter.close();
239             }
240             catch (IOException e)
241             {
242                //logger.error("Error while closing index writer.", e);
243             }
244         }
245         
246         return result;
247     }
248 
249     /* (non-Javadoc)
250      * @see org.apache.jetspeed.search.SearchEnging#remove(java.lang.Object)
251      */
252     public boolean remove(Object o)
253     {
254         Collection c = new ArrayList(1);
255         c.add(o);
256 
257         return remove(c);
258     }
259 
260     /* (non-Javadoc)
261      * @see org.apache.jetspeed.search.SearchEnging#remove(java.util.Collection)
262      */
263     public synchronized boolean remove(Collection objects)
264     {
265         boolean result = false;
266         
267         try 
268         {
269             IndexReader indexReader = IndexReader.open(this.rootIndexDir);
270 
271             Iterator it = objects.iterator();
272             while (it.hasNext()) 
273             {
274                 Object o = it.next();
275                 // Look up appropriate handler
276                 ObjectHandler handler = handlerFactory.getHandler(o);
277 
278                 // Parse the object
279                 ParsedObject parsedObject = handler.parseObject(o);
280 
281                 // Create term
282                 Term term = null;
283 
284                 if (parsedObject.getKey() != null)
285                 {
286                     term = new Term(ParsedObject.FIELDNAME_KEY, parsedObject.getKey());
287                     // Remove the document from search index
288                     int rc = indexReader.deleteDocuments(term);
289                     //logger.info("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
290                     //System.out.println("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
291                     result = rc > 0;
292                 }
293             }
294 
295             indexReader.close();
296 
297             if(optimizeAfterUpdate)
298             {
299                 optimize();
300             }
301 
302         }
303         catch (Exception e)
304         {
305             //logger.error("Exception", e);
306             result = false;
307         }
308 
309         return result;
310     }
311 
312     /* (non-Javadoc)
313      * @see org.apache.jetspeed.search.SearchEnging#update(java.lang.Object)
314      */
315     public boolean update(Object o)
316     {
317         Collection c = new ArrayList(1);
318         c.add(o);
319         
320         return update(c);
321     }
322 
323     /* (non-Javadoc)
324      * @see org.apache.jetspeed.search.SearchEnging#update(java.util.Collection)
325      */
326     public synchronized boolean update(Collection objects)
327     {
328         boolean result = false;
329         
330         try
331         {
332             // Delete entries from index
333             remove(objects);
334             result = true;
335         }
336         catch (Throwable e)
337         {
338             //logger.error("Exception",  e);
339         }
340 
341         try
342         {
343             // Add entries to index
344         	if(result)
345         	{
346         		add(objects);
347         		result = true;
348         	}
349         }
350         catch (Throwable e)
351         {
352             //logger.error("Exception",  e);
353         }
354         
355         return result;
356     }
357 
358     /* (non-Javadoc)
359      * @see org.apache.jetspeed.search.SearchEnging#optimize()
360      */
361     public synchronized boolean optimize()
362     {
363         boolean result = false;
364 
365     	try
366 		{
367     		IndexWriter indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), false);
368             indexWriter.optimize();
369             indexWriter.close();
370             result = true;
371         }
372         catch (IOException e)
373         {
374              //logger.error("Error while trying to optimize index.");
375         }
376         return result;
377     }
378 
379     /* (non-Javadoc)
380      * @see org.apache.jetspeed.search.SearchEngine#search(java.lang.String)
381      */
382     public SearchResults search(String queryString)
383     {        
384         Searcher searcher = null;
385         Hits hits = null;
386         
387         try
388         {
389             searcher = new IndexSearcher(rootIndexDir.getPath());
390         }
391         catch (IOException e)
392         {
393             //logger.error("Failed to create index search using path " + rootDir.getPath());
394             return null;
395         }
396         
397         Analyzer analyzer = newAnalyzer();
398         
399         String[] searchFields = {ParsedObject.FIELDNAME_CONTENT, ParsedObject.FIELDNAME_DESCRIPTION, ParsedObject.FIELDNAME_FIELDS,
400                            ParsedObject.FIELDNAME_KEY, ParsedObject.FIELDNAME_KEYWORDS, ParsedObject.FIELDNAME_LANGUAGE,
401                            ParsedObject.FIELDNAME_SCORE, ParsedObject.FIELDNAME_TITLE, ParsedObject.FIELDNAME_TYPE,
402                            ParsedObject.FIELDNAME_URL, ParsedObject.FIELDNAME_CLASSNAME};
403                             
404         Query query= null;
405         try
406         {
407         	String s[] = new String[searchFields.length];
408         	for(int i=0;i<s.length;i++)
409         		s[i] = queryString;
410             query = MultiFieldQueryParser.parse(s, searchFields, analyzer);
411 //          Query query = QueryParser.parse(searchString, ParsedObject.FIELDNAME_CONTENT, analyzer);
412         }
413         catch (ParseException e)
414         {
415             //logger.info("Failed to parse query " + query);
416             return null;
417         }
418         
419         try
420         {
421             hits = searcher.search(query);
422         }
423         catch (IOException e)
424         {
425            //logger.error("Error while peforming search.", e);
426            return null;
427         }
428 
429         int hitNum = hits.length();
430         ArrayList resultList = new ArrayList(hitNum);
431         for(int i=0; i<hitNum; i++)
432         {
433             ParsedObject result = new BaseParsedObject();
434             try
435             {
436 	            Document doc = hits.doc(i);
437 	        
438 		        addFieldsToParsedObject(doc, result);
439 		        
440 		        result.setScore(hits.score(i));
441 		        Field type = doc.getField(ParsedObject.FIELDNAME_TYPE);
442 		        if(type != null)
443 		        {
444 		            result.setType(type.stringValue());
445 		        }
446 		        
447 		        Field key = doc.getField(ParsedObject.FIELDNAME_KEY);
448 		        if(key != null)
449 		        {
450 		            result.setKey(key.stringValue());
451 		        }
452 		        
453 		        Field description = doc.getField(ParsedObject.FIELDNAME_DESCRIPTION);
454 		        if(description != null)
455 		        {
456 		            result.setDescription(description.stringValue());
457 		        }
458 		        
459 		        Field title = doc.getField(ParsedObject.FIELDNAME_TITLE);
460 		        if(title != null)
461 		        {
462 		            result.setTitle(title.stringValue());
463 		        }
464 		        
465 		        Field content = doc.getField(ParsedObject.FIELDNAME_CONTENT);
466 		        if(content != null)
467 		        {
468 		            result.setContent(content.stringValue());
469 		        }
470 		        
471 		        Field language = doc.getField(ParsedObject.FIELDNAME_LANGUAGE);
472 		        if (language != null)
473 		        {
474 		        	result.setLanguage(language.stringValue());
475 		        }
476 		        
477 		        Field classname = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
478 		        if (classname != null)
479 		        {
480 		        	result.setClassName(classname.stringValue());
481 		        }
482 		        
483 		        Field url = doc.getField(ParsedObject.FIELDNAME_URL);
484 		        if (url != null)
485 		        {
486 		            result.setURL(new URL(url.stringValue()));
487 		        }
488 		        
489 		        Field[] keywords = doc.getFields(ParsedObject.FIELDNAME_KEYWORDS);
490 		        if(keywords != null)
491 		        {
492 		        	String[] keywordArray = new String[keywords.length];
493 		        	
494 		        	for(int j=0; j<keywords.length; j++)
495 		        	{
496 		        		Field keyword = keywords[j];
497 		        		keywordArray[j] = keyword.stringValue();
498 		        	}
499 		        	
500 		        	result.setKeywords(keywordArray);
501 		        }
502 		        
503 		        resultList.add(i, result);
504             }
505             catch(IOException e)
506             {
507                 //logger
508             }
509         }
510 
511         if (searcher != null)
512         {
513             try
514             {
515                 searcher.close();
516             }
517             catch (IOException ioe)
518             {
519                 //logger.error("Closing Searcher", ioe);
520             }
521         }
522         
523         SearchResults results = new SearchResultsImpl(resultList);
524         return results;
525     }
526     
527     private Analyzer newAnalyzer() {
528         Analyzer rval = null;
529 
530         if(analyzerClassName != null)
531         {
532 	        try {
533 	            Class analyzerClass = Class.forName(analyzerClassName);
534 	            rval = (Analyzer) analyzerClass.newInstance();
535 	        } catch(InstantiationException e) {
536 	            //logger.error("InstantiationException", e);
537 	        } catch(ClassNotFoundException e) {
538 	            //logger.error("ClassNotFoundException", e);
539 	        } catch(IllegalAccessException e) {
540 	            //logger.error("IllegalAccessException", e);
541 	        }
542         }
543 
544         if(rval == null) {
545             rval = new StandardAnalyzer();
546         }
547 
548         return rval;
549     }
550 
551     private void addFieldsToDocument(Document doc, Map fields, int type)
552     {
553         if(fields != null)
554         {
555             Iterator keyIter = fields.keySet().iterator();
556             while(keyIter.hasNext())
557             {
558                 Object key = keyIter.next();
559                 if(key != null)
560                 {
561                     Object values = fields.get(key);
562                     if(values != null)
563                     {
564                         if(values instanceof Collection)
565                         {
566                             Iterator valueIter = ((Collection)values).iterator();
567                             while(valueIter.hasNext())
568                             {
569                                 Object value = valueIter.next();
570                                 if(value != null)
571                                 {
572                                     if(type == TEXT)
573                                     {
574                                         doc.add(new Field(key.toString(), value.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
575                                     }
576                                     else
577                                     {
578                                         doc.add(new Field(key.toString(), value.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
579                                     }
580                                 }
581                             }
582                         }
583                         else
584                         {
585                             if(type == TEXT)
586                             {
587                                 doc.add(new Field(key.toString(), values.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
588                             }
589                             else
590                             {
591                                 doc.add(new Field(key.toString(), values.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
592                             }
593                         }
594                     }
595                 }
596             } 
597         }
598     }
599     
600     private void addFieldsToParsedObject(Document doc, ParsedObject o)
601     {
602         try
603         {
604             MultiMap multiKeywords = new MultiHashMap();
605             MultiMap multiFields = new MultiHashMap();
606             HashMap fieldMap = new HashMap();
607             
608             Field classNameField = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
609             if(classNameField != null)
610             {
611                 String className = classNameField.stringValue();
612                 o.setClassName(className);
613                 ObjectHandler handler = handlerFactory.getHandler(className);
614                 
615                 Set fields = handler.getFields();
616                 addFieldsToMap(doc, fields, multiFields);
617                 addFieldsToMap(doc, fields, fieldMap);
618                 
619                 Set keywords = handler.getKeywords();
620                 addFieldsToMap(doc, keywords, multiKeywords);
621             }
622             
623             o.setKeywordsMap(multiKeywords);
624             o.setFields(multiFields);
625             o.setFields(fieldMap);
626         }
627         catch(Exception e)
628         {
629             //logger.error("Error trying to add fields to parsed object.", e);
630         }
631     }
632     
633     private void addFieldsToMap(Document doc, Set fieldNames, Map fields)
634     {
635         Iterator fieldIter = fieldNames.iterator();
636         while(fieldIter.hasNext())
637         {
638             String fieldName = (String)fieldIter.next();
639             Field[] docFields = doc.getFields(fieldName);
640             if(docFields != null)
641             {
642                 for(int i=0; i<docFields.length; i++)
643                 {
644                     Field field = docFields[i];
645                     if(field != null)
646                     {
647                         String value = field.stringValue();
648                         fields.put(fieldName, value);
649                     }
650                 }
651             }
652         }
653     }
654 }