Coverage Report - org.apache.any23.plugin.crawler.SharedData
 
Classes in this File Line Coverage Branch Coverage Complexity
SharedData
0%
0/16
0%
0/8
2
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.plugin.crawler;
 19  
 
 20  
 import edu.uci.ics.crawler4j.crawler.Page;
 21  
 
 22  
 import java.util.List;
 23  
 import java.util.regex.Pattern;
 24  
 
 25  
 /**
 26  
  * This class hosts shared data structures accessible
 27  
  * to all the {@link DefaultWebCrawler} instances
 28  
  * run by the {@link SiteCrawler}.
 29  
  *
 30  
  * @author Michele Mostarda (mostarda@fbk.eu)
 31  
  */
 32  
 public class SharedData {
 33  
 
 34  
     /**
 35  
      * Singleton instance.
 36  
      */
 37  
     private static SharedData instance;
 38  
 
 39  
     /**
 40  
      * Crawl seed.
 41  
      */
 42  
     private final String seed;
 43  
 
 44  
     /**
 45  
      * Crawl page filter pattern.
 46  
      */
 47  
     private final Pattern pattern;
 48  
 
 49  
     /**
 50  
      * List of crawler listeners.
 51  
      */
 52  
     private final List<CrawlerListener> listeners;
 53  
 
 54  
 //    /**
 55  
 //     * Output triple handler.
 56  
 //     */
 57  
 //    private final TripleHandler tripleHandler;
 58  
 
 59  
     /**
 60  
      * @return the singleton instance.
 61  
      */
 62  
     protected static SharedData getInstance() {
 63  0
         if(instance == null) throw new IllegalStateException("The configuration has not yet initialized.");
 64  0
         return instance;
 65  
     }
 66  
 
 67  
     /**
 68  
      * Initializes the crawler data.
 69  
      *
 70  
      * @param seed crawler seed.
 71  
      * @param regex page filter regex.
 72  
      * @param listeners the listeners to be notified of the crawler activity.
 73  
      */
 74  
     protected static void setCrawlData(String seed, Pattern regex, List<CrawlerListener> listeners) {
 75  0
         instance = new SharedData(seed, regex, listeners);
 76  0
     }
 77  
 
 78  
     /**
 79  
      * Internal constructor.
 80  
      *
 81  
      * @param seed
 82  
      * @param pattern
 83  
      * @param listeners
 84  
      */
 85  0
     private SharedData(String seed, Pattern pattern, List<CrawlerListener> listeners) {
 86  0
         if(seed == null || seed.trim().length() == 0)
 87  0
             throw new IllegalArgumentException(
 88  
                 String.format("Invalid seed '%s'", seed)
 89  
             );
 90  
 
 91  0
         this.seed      = seed;
 92  0
         this.pattern   = pattern;
 93  0
         this.listeners = listeners;
 94  0
     }
 95  
 
 96  
     /**
 97  
      * @return crawl seed.
 98  
      */
 99  
     protected String getSeed() {
 100  0
         return seed;
 101  
     }
 102  
 
 103  
     /**
 104  
      * @return page filter pattern.
 105  
      */
 106  
     protected Pattern getPattern() {
 107  0
         return pattern;
 108  
     }
 109  
 
 110  
     /**
 111  
      * Notifies all listeners that a page has been discovered.
 112  
      *
 113  
      * @param page the discovered page.
 114  
      */
 115  
     protected void notifyPage(Page page) {
 116  0
         for(CrawlerListener listener : listeners) {
 117  0
             listener.visitedPage(page);
 118  
         }
 119  0
     }
 120  
 
 121  
 }