/[Apache-SVN]/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
ViewVC logotype

Diff of /lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java	2005/12/29 15:25:20	359821
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java	2005/12/29 15:28:30	359822
@@ -38,6 +38,7 @@ public class CrawlDbReducer implements R
 
     CrawlDatum highest = null;
     CrawlDatum old = null;
+    byte[] signature = null;
     float scoreIncrement = 0.0f;
 
     while (values.hasNext()) {
@@ -55,6 +56,8 @@ public class CrawlDbReducer implements R
       case CrawlDatum.STATUS_LINKED:
         scoreIncrement += datum.getScore();
         break;
+      case CrawlDatum.STATUS_SIGNATURE:
+        signature = datum.getSignature();
       }
     }
 
@@ -76,16 +79,20 @@ public class CrawlDbReducer implements R
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
         result.setScore(1.0f);                    // initial score is 1.0f
       }
+      result.setSignature(null);                  // reset the signature
       break;
       
     case CrawlDatum.STATUS_FETCH_SUCCESS:         // succesful fetch
       result = highest;                           // use new entry
+      if (highest.getSignature() == null) highest.setSignature(signature);
       result.setStatus(CrawlDatum.STATUS_DB_FETCHED);
       result.setNextFetchTime();
       break;
 
     case CrawlDatum.STATUS_FETCH_RETRY:           // temporary failure
       result = highest;                           // use new entry
+      if (old != null)
+        result.setSignature(old.getSignature());  // use old signature
       if (highest.getRetriesSinceFetch() < retryMax) {
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
       } else {
@@ -95,6 +102,8 @@ public class CrawlDbReducer implements R
 
     case CrawlDatum.STATUS_FETCH_GONE:            // permanent failure
       result = highest;                           // use new entry
+      if (old != null)
+        result.setSignature(old.getSignature());  // use old signature
       result.setStatus(CrawlDatum.STATUS_DB_GONE);
       break;
 

 

infrastructure at apache.org
ViewVC Help
Powered by ViewVC 1.1.26