/[Apache-SVN]/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
ViewVC logotype

Diff of /lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java	2005/12/29 15:25:20	359821
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java	2005/12/29 15:28:30	359822
@@ -16,6 +16,8 @@
 
 package org.apache.nutch.parse;
 
+import org.apache.nutch.crawl.SignatureFactory;
+import org.apache.nutch.fetcher.Fetcher;
 import org.apache.nutch.io.*;
 import org.apache.nutch.parse.ParseOutputFormat;
 import org.apache.nutch.mapred.*;
@@ -32,13 +34,14 @@ public class ParseSegment extends NutchC
   public static final Logger LOG =
     LogFormatter.getLogger(Parser.class.getName());
 
-  public ParseSegment() { super(null); }
+  public ParseSegment() { super(NutchConf.get()); }
 
   public ParseSegment(NutchConf conf) {
     super(conf);
   }
 
   public void configure(JobConf job) {
+    setConf(job);
   }
 
   public void map(WritableComparable key, Writable value,
@@ -55,6 +58,9 @@ public class ParseSegment extends NutchC
       status = new ParseStatus(e);
     }
 
+    // compute the new signature
+    byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse);
+    parse.getData().getMetadata().setProperty(Fetcher.SIGNATURE_KEY, StringUtil.toHexString(signature));
     if (status.isSuccess()) {
       output.collect(key, new ParseImpl(parse.getText(), parse.getData()));
     } else {

 

infrastructure at apache.org
ViewVC Help
Powered by ViewVC 1.1.26