Constant Field Values


Contents
org.apache.*

org.apache.nutch.crawl.CrawlDatum
public static final String FETCH_DIR_NAME "crawl_fetch"
public static final String GENERATE_DIR_NAME "crawl_generate"
public static final String PARSE_DIR_NAME "crawl_parse"
public static final byte STATUS_DB_FETCHED 2
public static final byte STATUS_DB_GONE 3
public static final byte STATUS_DB_MAX 31
public static final byte STATUS_DB_NOTMODIFIED 6
public static final byte STATUS_DB_REDIR_PERM 5
public static final byte STATUS_DB_REDIR_TEMP 4
public static final byte STATUS_DB_UNFETCHED 1
public static final byte STATUS_FETCH_GONE 37
public static final byte STATUS_FETCH_MAX 63
public static final byte STATUS_FETCH_NOTMODIFIED 38
public static final byte STATUS_FETCH_REDIR_PERM 36
public static final byte STATUS_FETCH_REDIR_TEMP 35
public static final byte STATUS_FETCH_RETRY 34
public static final byte STATUS_FETCH_SUCCESS 33
public static final byte STATUS_INJECTED 66
public static final byte STATUS_LINKED 67
public static final byte STATUS_PARSE_META 68
public static final byte STATUS_SIGNATURE 65

org.apache.nutch.crawl.CrawlDb
public static final String CRAWLDB_ADDITIONS_ALLOWED "db.update.additions.allowed"
public static final String CURRENT_NAME "current"
public static final String LOCK_NAME ".locked"

org.apache.nutch.crawl.CrawlDbFilter
public static final String URL_FILTERING "crawldb.url.filters"
public static final String URL_NORMALIZING "crawldb.url.normalizers"
public static final String URL_NORMALIZING_SCOPE "crawldb.url.normalizers.scope"

org.apache.nutch.crawl.CrawlDbReader
public static final int CSV_FORMAT 1
public static final int STD_FORMAT 0

org.apache.nutch.crawl.FetchSchedule
public static final int SECONDS_PER_DAY 86400
public static final int STATUS_MODIFIED 1
public static final int STATUS_NOTMODIFIED 2
public static final int STATUS_UNKNOWN 0

org.apache.nutch.crawl.Generator
public static final String GENERATE_MAX_PER_HOST "generate.max.per.host"
public static final String GENERATE_MAX_PER_HOST_BY_IP "generate.max.per.host.by.ip"
public static final String GENERATE_UPDATE_CRAWLDB "generate.update.crawldb"
public static final String GENERATOR_COUNT_MODE "generate.count.mode"
public static final String GENERATOR_COUNT_VALUE_DOMAIN "domain"
public static final String GENERATOR_COUNT_VALUE_HOST "host"
public static final String GENERATOR_CUR_TIME "generate.curTime"
public static final String GENERATOR_DELAY "crawl.gen.delay"
public static final String GENERATOR_FILTER "generate.filter"
public static final String GENERATOR_MAX_COUNT "generate.max.count"
public static final String GENERATOR_MAX_NUM_SEGMENTS "generate.max.num.segments"
public static final String GENERATOR_MIN_SCORE "generate.min.score"
public static final String GENERATOR_NORMALISE "generate.normalise"
public static final String GENERATOR_TOP_N "generate.topN"

org.apache.nutch.crawl.LinkDb
public static final String CURRENT_NAME "current"
public static final String LOCK_NAME ".locked"

org.apache.nutch.crawl.LinkDbFilter
public static final String URL_FILTERING "linkdb.url.filters"
public static final String URL_NORMALIZING "linkdb.url.normalizer"
public static final String URL_NORMALIZING_SCOPE "linkdb.url.normalizer.scope"

org.apache.nutch.crawl.URLPartitioner
public static final String PARTITION_MODE_DOMAIN "byDomain"
public static final String PARTITION_MODE_HOST "byHost"
public static final String PARTITION_MODE_IP "byIP"
public static final String PARTITION_MODE_KEY "partition.url.mode"

org.apache.nutch.fetcher.Fetcher
public static final String CONTENT_REDIR "content"
public static final int PERM_REFRESH_TIME 5
public static final String PROTOCOL_REDIR "protocol"

org.apache.nutch.fetcher.OldFetcher
public static final String CONTENT_REDIR "content"
public static final int PERM_REFRESH_TIME 5
public static final String PROTOCOL_REDIR "protocol"

org.apache.nutch.indexer.IndexingFilters
public static final String INDEXINGFILTER_ORDER "indexingfilter.order"

org.apache.nutch.indexer.NutchDocument
public static final byte VERSION 2

org.apache.nutch.indexer.solr.SolrConstants
public static final String BOOST_FIELD "boost"
public static final String COMMIT_SIZE "solr.commit.size"
public static final String DIGEST_FIELD "digest"
public static final String ID_FIELD "id"
public static final String MAPPING_FILE "solr.mapping.file"
public static final String SERVER_URL "solr.server.url"
public static final String SOLR_PREFIX "solr."
public static final String TIMESTAMP_FIELD "tstamp"
public static final String URL_FIELD "url"

org.apache.nutch.metadata.CreativeCommons
public static final String LICENSE_LOCATION "License-Location"
public static final String LICENSE_URL "License-Url"
public static final String WORK_TYPE "Work-Type"

org.apache.nutch.metadata.DublinCore
public static final String CONTRIBUTOR "contributor"
public static final String COVERAGE "coverage"
public static final String CREATOR "creator"
public static final String DATE "date"
public static final String DESCRIPTION "description"
public static final String FORMAT "format"
public static final String IDENTIFIER "identifier"
public static final String LANGUAGE "language"
public static final String MODIFIED "modified"
public static final String PUBLISHER "publisher"
public static final String RELATION "relation"
public static final String RIGHTS "rights"
public static final String SOURCE "source"
public static final String SUBJECT "subject"
public static final String TITLE "title"
public static final String TYPE "type"

org.apache.nutch.metadata.Feed
public static final String FEED "feed"
public static final String FEED_AUTHOR "author"
public static final String FEED_PUBLISHED "published"
public static final String FEED_TAGS "tag"
public static final String FEED_UPDATED "updated"

org.apache.nutch.metadata.HttpHeaders
public static final String CONTENT_DISPOSITION "Content-Disposition"
public static final String CONTENT_ENCODING "Content-Encoding"
public static final String CONTENT_LANGUAGE "Content-Language"
public static final String CONTENT_LENGTH "Content-Length"
public static final String CONTENT_LOCATION "Content-Location"
public static final String CONTENT_MD5 "Content-MD5"
public static final String CONTENT_TYPE "Content-Type"
public static final String LAST_MODIFIED "Last-Modified"
public static final String LOCATION "Location"

org.apache.nutch.metadata.Nutch
public static final String CACHING_FORBIDDEN_ALL "all"
public static final String CACHING_FORBIDDEN_CONTENT "content"
public static final String CACHING_FORBIDDEN_KEY "caching.forbidden"
public static final String CACHING_FORBIDDEN_NONE "none"
public static final String CHAR_ENCODING_FOR_CONVERSION "CharEncodingForConversion"
public static final String FETCH_STATUS_KEY "_fst_"
public static final String FETCH_TIME_KEY "_ftk_"
public static final String GENERATE_TIME_KEY "_ngt_"
public static final String ORIGINAL_CHAR_ENCODING "OriginalCharEncoding"
public static final String PROTO_STATUS_KEY "_pst_"
public static final String REPR_URL_KEY "_repr_"
public static final String SCORE_KEY "nutch.crawl.score"
public static final String SEGMENT_NAME_KEY "nutch.segment.name"
public static final String SIGNATURE_KEY "nutch.content.digest"

org.apache.nutch.metadata.Office
public static final String APPLICATION_NAME "Application-Name"
public static final String AUTHOR "Author"
public static final String CHARACTER_COUNT "Character Count"
public static final String COMMENTS "Comments"
public static final String KEYWORDS "Keywords"
public static final String LAST_AUTHOR "Last-Author"
public static final String LAST_PRINTED "Last-Printed"
public static final String LAST_SAVED "Last-Save-Date"
public static final String PAGE_COUNT "Page-Count"
public static final String REVISION_NUMBER "Revision-Number"
public static final String TEMPLATE "Template"
public static final String WORD_COUNT "Word-Count"

org.apache.nutch.microformats.reltag.RelTagParser
public static final String REL_TAG "Rel-Tag"

org.apache.nutch.net.URLFilters
public static final String URLFILTER_ORDER "urlfilter.order"

org.apache.nutch.net.URLNormalizers
public static final String SCOPE_CRAWLDB "crawldb"
public static final String SCOPE_DEFAULT "default"
public static final String SCOPE_FETCHER "fetcher"
public static final String SCOPE_GENERATE_HOST_COUNT "generate_host_count"
public static final String SCOPE_INJECT "inject"
public static final String SCOPE_LINKDB "linkdb"
public static final String SCOPE_OUTLINK "outlink"
public static final String SCOPE_PARTITION "partition"

org.apache.nutch.parse.HtmlParseFilters
public static final String HTMLPARSEFILTER_ORDER "htmlparsefilter.order"

org.apache.nutch.parse.ParseData
public static final String DIR_NAME "parse_data"

org.apache.nutch.parse.ParserFactory
public static final String DEFAULT_PLUGIN "*"

org.apache.nutch.parse.ParseStatus
public static final byte FAILED 2
public static final short FAILED_EXCEPTION 200
public static final short FAILED_INVALID_FORMAT 203
public static final short FAILED_MISSING_CONTENT 205
public static final short FAILED_MISSING_PARTS 204
public static final short FAILED_TRUNCATED 202
public static final byte NOTPARSED 0
public static final byte SUCCESS 1
public static final short SUCCESS_REDIRECT 100

org.apache.nutch.parse.ParseText
public static final String DIR_NAME "parse_text"

org.apache.nutch.protocol.Content
public static final String DIR_NAME "content"

org.apache.nutch.protocol.Protocol
public static final String CHECK_BLOCKING "protocol.plugin.check.blocking"
public static final String CHECK_ROBOTS "protocol.plugin.check.robots"

org.apache.nutch.protocol.ProtocolStatus
public static final int ACCESS_DENIED 17
public static final int BLOCKED 23
public static final int EXCEPTION 16
public static final int FAILED 2
public static final int GONE 11
public static final int MOVED 12
public static final int NOTFETCHING 20
public static final int NOTFOUND 14
public static final int NOTMODIFIED 21
public static final int PROTO_NOT_FOUND 10
public static final int REDIR_EXCEEDED 19
public static final int RETRY 15
public static final int ROBOTS_DENIED 18
public static final int SUCCESS 1
public static final int TEMP_MOVED 13
public static final int WOULDBLOCK 22

org.apache.nutch.protocol.ftp.Client
protected static final int TERMINAL_TYPE 24
protected static final int TERMINAL_TYPE_IS 0
protected static final int TERMINAL_TYPE_SEND 1

org.apache.nutch.protocol.http.api.HttpBase
public static final int BUFFER_SIZE 8192

org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory
public static final String WWW_AUTHENTICATE "WWW-Authenticate"

org.apache.nutch.scoring.webgraph.LinkDatum
public static final byte INLINK 1
public static final byte OUTLINK 2

org.apache.nutch.scoring.webgraph.LinkDumper
public static final String DUMP_DIR "linkdump"

org.apache.nutch.scoring.webgraph.Loops
public static final String LOOPS_DIR "loops"
public static final String ROUTES_DIR "routes"

org.apache.nutch.scoring.webgraph.WebGraph
public static final String INLINK_DIR "inlinks"
public static final String LOCK_NAME ".locked"
public static final String NODE_DIR "nodes"
public static final String OUTLINK_DIR "outlinks"

org.apache.nutch.tools.arc.ArcSegmentCreator
public static final String URL_VERSION "arc.url.version"

org.apache.nutch.tools.proxy.DelayHandler
public static final long DEFAULT_DELAY 2000L

org.apache.nutch.urlfilter.automaton.AutomatonURLFilter
public static final String URLFILTER_AUTOMATON_FILE "urlfilter.automaton.file"
public static final String URLFILTER_AUTOMATON_RULES "urlfilter.automaton.rules"

org.apache.nutch.urlfilter.regex.RegexURLFilter
public static final String URLFILTER_REGEX_FILE "urlfilter.regex.file"
public static final String URLFILTER_REGEX_RULES "urlfilter.regex.rules"

org.apache.nutch.util.EncodingDetector
public static final String MIN_CONFIDENCE_KEY "encodingdetector.charset.min.confidence"
public static final int NO_THRESHOLD -1

org.apache.nutch.util.NutchConfiguration
public static final String UUID_KEY "nutch.conf.uuid"

org.apache.nutch.util.domain.DomainSuffix
public static final float DEFAULT_BOOST 1.0f



Copyright © 2011 The Apache Software Foundation