org.apache.nutch.parse
Class ParseOutputFormat

java.lang.Object
  extended by org.apache.nutch.parse.ParseOutputFormat
All Implemented Interfaces:
OutputFormat<Text,Parse>

public class ParseOutputFormat
extends Object
implements OutputFormat<Text,Parse>


Constructor Summary
ParseOutputFormat()
           
 
Method Summary
 void checkOutputSpecs(FileSystem fs, JobConf job)
           
static String filterNormalize(String fromUrl, String toUrl, String fromHost, boolean ignoreExternalLinks, URLFilters filters, URLNormalizers normalizers)
           
 RecordWriter<Text,Parse> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

ParseOutputFormat

public ParseOutputFormat()
Method Detail

checkOutputSpecs

public void checkOutputSpecs(FileSystem fs,
                             JobConf job)
                      throws IOException
Specified by:
checkOutputSpecs in interface OutputFormat<Text,Parse>
Throws:
IOException

getRecordWriter

public RecordWriter<Text,Parse> getRecordWriter(FileSystem fs,
                                                JobConf job,
                                                String name,
                                                Progressable progress)
                                         throws IOException
Specified by:
getRecordWriter in interface OutputFormat<Text,Parse>
Throws:
IOException

filterNormalize

public static String filterNormalize(String fromUrl,
                                     String toUrl,
                                     String fromHost,
                                     boolean ignoreExternalLinks,
                                     URLFilters filters,
                                     URLNormalizers normalizers)


Copyright © 2012 The Apache Software Foundation