|
||||||||||
PREV NEXT | FRAMES NO FRAMES |
org.apache.* |
---|
org.apache.nutch.analysis.NutchAnalysisConstants | ||
---|---|---|
public static final int |
ACRONYM |
2 |
public static final int |
APOSTROPHE |
14 |
public static final int |
ATSIGN |
13 |
public static final int |
C_PLUS_PLUS |
5 |
public static final int |
C_SHARP |
6 |
public static final int |
CJK |
18 |
public static final int |
COLON |
10 |
public static final int |
DEFAULT |
0 |
public static final int |
DIGIT |
19 |
public static final int |
DOT |
12 |
public static final int |
EOF |
0 |
public static final int |
IRREGULAR_WORD |
4 |
public static final int |
LETTER |
17 |
public static final int |
MINUS |
8 |
public static final int |
PLUS |
7 |
public static final int |
QUOTE |
9 |
public static final int |
SIGRAM |
3 |
public static final int |
SLASH |
11 |
public static final int |
WHITE |
15 |
public static final int |
WORD |
1 |
public static final int |
WORD_PUNCT |
16 |
org.apache.nutch.analysis.NutchDocumentAnalyzer | ||
---|---|---|
public static final int |
INTER_ANCHOR_GAP |
4 |
org.apache.nutch.clustering.carrot2.NutchInputComponent | ||
---|---|---|
public static final String |
NUTCH_INPUT_HIT_DETAILS_ARRAY |
"NUTCH_INPUT_HIT_DETAILS_ARRAY" |
public static final String |
NUTCH_INPUT_SUMMARIES_ARRAY |
"NUTCH_INPUT_SUMMARIES_ARRAY" |
org.apache.nutch.crawl.CrawlDatum | ||
---|---|---|
public static final String |
FETCH_DIR_NAME |
"crawl_fetch" |
public static final String |
GENERATE_DIR_NAME |
"crawl_generate" |
public static final String |
PARSE_DIR_NAME |
"crawl_parse" |
public static final byte |
STATUS_DB_FETCHED |
2 |
public static final byte |
STATUS_DB_GONE |
3 |
public static final byte |
STATUS_DB_MAX |
31 |
public static final byte |
STATUS_DB_NOTMODIFIED |
6 |
public static final byte |
STATUS_DB_REDIR_PERM |
5 |
public static final byte |
STATUS_DB_REDIR_TEMP |
4 |
public static final byte |
STATUS_DB_UNFETCHED |
1 |
public static final byte |
STATUS_FETCH_GONE |
37 |
public static final byte |
STATUS_FETCH_MAX |
63 |
public static final byte |
STATUS_FETCH_NOTMODIFIED |
38 |
public static final byte |
STATUS_FETCH_REDIR_PERM |
36 |
public static final byte |
STATUS_FETCH_REDIR_TEMP |
35 |
public static final byte |
STATUS_FETCH_RETRY |
34 |
public static final byte |
STATUS_FETCH_SUCCESS |
33 |
public static final byte |
STATUS_INJECTED |
66 |
public static final byte |
STATUS_LINKED |
67 |
public static final byte |
STATUS_PARSE_META |
68 |
public static final byte |
STATUS_SIGNATURE |
65 |
org.apache.nutch.crawl.CrawlDb | ||
---|---|---|
public static final String |
CRAWLDB_ADDITIONS_ALLOWED |
"db.update.additions.allowed" |
public static final String |
CURRENT_NAME |
"current" |
public static final String |
LOCK_NAME |
".locked" |
org.apache.nutch.crawl.CrawlDbFilter | ||
---|---|---|
public static final String |
URL_FILTERING |
"crawldb.url.filters" |
public static final String |
URL_NORMALIZING |
"crawldb.url.normalizers" |
public static final String |
URL_NORMALIZING_SCOPE |
"crawldb.url.normalizers.scope" |
org.apache.nutch.crawl.CrawlDbReader | ||
---|---|---|
public static final int |
CSV_FORMAT |
1 |
public static final int |
STD_FORMAT |
0 |
org.apache.nutch.crawl.FetchSchedule | ||
---|---|---|
public static final int |
SECONDS_PER_DAY |
86400 |
public static final int |
STATUS_MODIFIED |
1 |
public static final int |
STATUS_NOTMODIFIED |
2 |
public static final int |
STATUS_UNKNOWN |
0 |
org.apache.nutch.crawl.Generator | ||
---|---|---|
public static final String |
GENERATE_MAX_PER_HOST |
"generate.max.per.host" |
public static final String |
GENERATE_MAX_PER_HOST_BY_IP |
"generate.max.per.host.by.ip" |
public static final String |
GENERATE_UPDATE_CRAWLDB |
"generate.update.crawldb" |
public static final String |
GENERATOR_COUNT_MODE |
"generate.count.mode" |
public static final String |
GENERATOR_COUNT_VALUE_DOMAIN |
"domain" |
public static final String |
GENERATOR_COUNT_VALUE_HOST |
"host" |
public static final String |
GENERATOR_CUR_TIME |
"generate.curTime" |
public static final String |
GENERATOR_DELAY |
"crawl.gen.delay" |
public static final String |
GENERATOR_FILTER |
"generate.filter" |
public static final String |
GENERATOR_MAX_COUNT |
"generate.max.count" |
public static final String |
GENERATOR_MAX_NUM_SEGMENTS |
"generate.max.num.segments" |
public static final String |
GENERATOR_MIN_SCORE |
"generate.min.score" |
public static final String |
GENERATOR_NORMALISE |
"generate.normalise" |
public static final String |
GENERATOR_TOP_N |
"generate.topN" |
org.apache.nutch.crawl.LinkDb | ||
---|---|---|
public static final String |
CURRENT_NAME |
"current" |
public static final String |
LOCK_NAME |
".locked" |
org.apache.nutch.crawl.LinkDbFilter | ||
---|---|---|
public static final String |
URL_FILTERING |
"linkdb.url.filters" |
public static final String |
URL_NORMALIZING |
"linkdb.url.normalizer" |
public static final String |
URL_NORMALIZING_SCOPE |
"linkdb.url.normalizer.scope" |
org.apache.nutch.crawl.URLPartitioner | ||
---|---|---|
public static final String |
PARTITION_MODE_DOMAIN |
"byDomain" |
public static final String |
PARTITION_MODE_HOST |
"byHost" |
public static final String |
PARTITION_MODE_IP |
"byIP" |
public static final String |
PARTITION_MODE_KEY |
"partition.url.mode" |
org.apache.nutch.fetcher.Fetcher | ||
---|---|---|
public static final String |
CONTENT_REDIR |
"content" |
public static final int |
PERM_REFRESH_TIME |
5 |
public static final String |
PROTOCOL_REDIR |
"protocol" |
org.apache.nutch.fetcher.OldFetcher | ||
---|---|---|
public static final String |
CONTENT_REDIR |
"content" |
public static final int |
PERM_REFRESH_TIME |
5 |
public static final String |
PROTOCOL_REDIR |
"protocol" |
org.apache.nutch.indexer.Indexer | ||
---|---|---|
public static final String |
DONE_NAME |
"index.done" |
org.apache.nutch.indexer.IndexingFilters | ||
---|---|---|
public static final String |
INDEXINGFILTER_ORDER |
"indexingfilter.order" |
org.apache.nutch.indexer.IndexMerger | ||
---|---|---|
public static final String |
DONE_NAME |
"merge.done" |
org.apache.nutch.indexer.NutchDocument | ||
---|---|---|
public static final byte |
VERSION |
1 |
org.apache.nutch.indexer.field.FieldFilters | ||
---|---|---|
public static final String |
FIELD_FILTER_ORDER |
"field.filter.order" |
org.apache.nutch.indexer.field.FieldIndexer | ||
---|---|---|
public static final String |
DONE_NAME |
"index.done" |
org.apache.nutch.indexer.field.Fields | ||
---|---|---|
public static final String |
ACTION |
"action" |
public static final String |
ANCHOR |
"anchor" |
public static final String |
BOOST |
"boost" |
public static final String |
BOOSTFACTOR |
"boostfactor" |
public static final String |
CACHE |
"cache" |
public static final String |
COMPUTATION |
"computation" |
public static final String |
CONTENT |
"content" |
public static final String |
DIGEST |
"digest" |
public static final String |
HOST |
"host" |
public static final String |
ORIG_URL |
"orig" |
public static final String |
SEG_URL |
"segurl" |
public static final String |
SEGMENT |
"segment" |
public static final String |
SITE |
"site" |
public static final String |
TITLE |
"title" |
public static final String |
TSTAMP |
"tstamp" |
public static final String |
URL |
"url" |
org.apache.nutch.indexer.lucene.LuceneConstants | ||
---|---|---|
public static final String |
FIELD_INDEX_PREFIX |
"lucene.field.index." |
public static final String |
FIELD_PREFIX |
"lucene.field." |
public static final String |
FIELD_STORE_PREFIX |
"lucene.field.store." |
public static final String |
FIELD_VECTOR_PREFIX |
"lucene.field.vector." |
public static final String |
INDEX_NO |
"index.no" |
public static final String |
INDEX_NO_NORMS |
"index.no_norms" |
public static final String |
INDEX_TOKENIZED |
"index.tokenized" |
public static final String |
INDEX_UNTOKENIZED |
"index.untokenized" |
public static final String |
LUCENE_PREFIX |
"lucene." |
public static final String |
STORE_COMPRESS |
"store.compress" |
public static final String |
STORE_NO |
"store.no" |
public static final String |
STORE_YES |
"store.yes" |
public static final String |
VECTOR_NO |
"vector.no" |
public static final String |
VECTOR_OFFSET |
"vector.offset" |
public static final String |
VECTOR_POS |
"vector.pos" |
public static final String |
VECTOR_POS_OFFSET |
"vector.pos_offset" |
public static final String |
VECTOR_YES |
"vector.yes" |
org.apache.nutch.indexer.solr.SolrConstants | ||
---|---|---|
public static final String |
BOOST_FIELD |
"boost" |
public static final String |
COMMIT_SIZE |
"solr.commit.size" |
public static final String |
DIGEST_FIELD |
"digest" |
public static final String |
ID_FIELD |
"id" |
public static final String |
SERVER_URL |
"solr.server.url" |
public static final String |
SOLR_PREFIX |
"solr." |
public static final String |
TIMESTAMP_FIELD |
"tstamp" |
public static final String |
URL_FIELD |
"url" |
org.apache.nutch.metadata.CreativeCommons | ||
---|---|---|
public static final String |
LICENSE_LOCATION |
"License-Location" |
public static final String |
LICENSE_URL |
"License-Url" |
public static final String |
WORK_TYPE |
"Work-Type" |
org.apache.nutch.metadata.DublinCore | ||
---|---|---|
public static final String |
CONTRIBUTOR |
"contributor" |
public static final String |
COVERAGE |
"coverage" |
public static final String |
CREATOR |
"creator" |
public static final String |
DATE |
"date" |
public static final String |
DESCRIPTION |
"description" |
public static final String |
FORMAT |
"format" |
public static final String |
IDENTIFIER |
"identifier" |
public static final String |
LANGUAGE |
"language" |
public static final String |
MODIFIED |
"modified" |
public static final String |
PUBLISHER |
"publisher" |
public static final String |
RELATION |
"relation" |
public static final String |
RIGHTS |
"rights" |
public static final String |
SOURCE |
"source" |
public static final String |
SUBJECT |
"subject" |
public static final String |
TITLE |
"title" |
public static final String |
TYPE |
"type" |
org.apache.nutch.metadata.Feed | ||
---|---|---|
public static final String |
FEED |
"feed" |
public static final String |
FEED_AUTHOR |
"author" |
public static final String |
FEED_PUBLISHED |
"published" |
public static final String |
FEED_TAGS |
"tag" |
public static final String |
FEED_UPDATED |
"updated" |
org.apache.nutch.metadata.HttpHeaders | ||
---|---|---|
public static final String |
CONTENT_DISPOSITION |
"Content-Disposition" |
public static final String |
CONTENT_ENCODING |
"Content-Encoding" |
public static final String |
CONTENT_LANGUAGE |
"Content-Language" |
public static final String |
CONTENT_LENGTH |
"Content-Length" |
public static final String |
CONTENT_LOCATION |
"Content-Location" |
public static final String |
CONTENT_MD5 |
"Content-MD5" |
public static final String |
CONTENT_TYPE |
"Content-Type" |
public static final String |
LAST_MODIFIED |
"Last-Modified" |
public static final String |
LOCATION |
"Location" |
org.apache.nutch.metadata.Nutch | ||
---|---|---|
public static final String |
CACHING_FORBIDDEN_ALL |
"all" |
public static final String |
CACHING_FORBIDDEN_CONTENT |
"content" |
public static final String |
CACHING_FORBIDDEN_KEY |
"caching.forbidden" |
public static final String |
CACHING_FORBIDDEN_NONE |
"none" |
public static final String |
CHAR_ENCODING_FOR_CONVERSION |
"CharEncodingForConversion" |
public static final String |
FETCH_STATUS_KEY |
"_fst_" |
public static final String |
FETCH_TIME_KEY |
"_ftk_" |
public static final String |
GENERATE_TIME_KEY |
"_ngt_" |
public static final String |
ORIGINAL_CHAR_ENCODING |
"OriginalCharEncoding" |
public static final String |
PROTO_STATUS_KEY |
"_pst_" |
public static final String |
REPR_URL_KEY |
"_repr_" |
public static final String |
SCORE_KEY |
"nutch.crawl.score" |
public static final String |
SEGMENT_NAME_KEY |
"nutch.segment.name" |
public static final String |
SIGNATURE_KEY |
"nutch.content.digest" |
org.apache.nutch.metadata.Office | ||
---|---|---|
public static final String |
APPLICATION_NAME |
"Application-Name" |
public static final String |
AUTHOR |
"Author" |
public static final String |
CHARACTER_COUNT |
"Character Count" |
public static final String |
COMMENTS |
"Comments" |
public static final String |
KEYWORDS |
"Keywords" |
public static final String |
LAST_AUTHOR |
"Last-Author" |
public static final String |
LAST_PRINTED |
"Last-Printed" |
public static final String |
LAST_SAVED |
"Last-Save-Date" |
public static final String |
PAGE_COUNT |
"Page-Count" |
public static final String |
REVISION_NUMBER |
"Revision-Number" |
public static final String |
TEMPLATE |
"Template" |
public static final String |
WORD_COUNT |
"Word-Count" |
org.apache.nutch.microformats.reltag.RelTagParser | ||
---|---|---|
public static final String |
REL_TAG |
"Rel-Tag" |
org.apache.nutch.net.URLFilters | ||
---|---|---|
public static final String |
URLFILTER_ORDER |
"urlfilter.order" |
org.apache.nutch.net.URLNormalizers | ||
---|---|---|
public static final String |
SCOPE_CRAWLDB |
"crawldb" |
public static final String |
SCOPE_DEFAULT |
"default" |
public static final String |
SCOPE_FETCHER |
"fetcher" |
public static final String |
SCOPE_GENERATE_HOST_COUNT |
"generate_host_count" |
public static final String |
SCOPE_INJECT |
"inject" |
public static final String |
SCOPE_LINKDB |
"linkdb" |
public static final String |
SCOPE_OUTLINK |
"outlink" |
public static final String |
SCOPE_PARTITION |
"partition" |
org.apache.nutch.ontology.jena.OntologyImpl | ||
---|---|---|
public static final String |
DELIMITER_SEARCHTERM |
" " |
org.apache.nutch.parse.HtmlParseFilters | ||
---|---|---|
public static final String |
HTMLPARSEFILTER_ORDER |
"htmlparsefilter.order" |
org.apache.nutch.parse.ParseData | ||
---|---|---|
public static final String |
DIR_NAME |
"parse_data" |
org.apache.nutch.parse.ParserFactory | ||
---|---|---|
public static final String |
DEFAULT_PLUGIN |
"*" |
org.apache.nutch.parse.ParseStatus | ||
---|---|---|
public static final byte |
FAILED |
2 |
public static final short |
FAILED_EXCEPTION |
200 |
public static final short |
FAILED_INVALID_FORMAT |
203 |
public static final short |
FAILED_MISSING_CONTENT |
205 |
public static final short |
FAILED_MISSING_PARTS |
204 |
public static final short |
FAILED_TRUNCATED |
202 |
public static final byte |
NOTPARSED |
0 |
public static final byte |
SUCCESS |
1 |
public static final short |
SUCCESS_REDIRECT |
100 |
org.apache.nutch.parse.ParseText | ||
---|---|---|
public static final String |
DIR_NAME |
"parse_text" |
org.apache.nutch.parse.msexcel.MSExcelParser | ||
---|---|---|
public static final String |
MIME_TYPE |
"application/vnd.ms-excel" |
org.apache.nutch.parse.mspowerpoint.MSPowerPointParser | ||
---|---|---|
public static final String |
MIME_TYPE |
"application/vnd.ms-powerpoint" |
org.apache.nutch.parse.msword.MSWordParser | ||
---|---|---|
public static final String |
MIME_TYPE |
"application/msword" |
org.apache.nutch.protocol.Content | ||
---|---|---|
public static final String |
DIR_NAME |
"content" |
org.apache.nutch.protocol.Protocol | ||
---|---|---|
public static final String |
CHECK_BLOCKING |
"protocol.plugin.check.blocking" |
public static final String |
CHECK_ROBOTS |
"protocol.plugin.check.robots" |
org.apache.nutch.protocol.ProtocolStatus | ||
---|---|---|
public static final int |
ACCESS_DENIED |
17 |
public static final int |
BLOCKED |
23 |
public static final int |
EXCEPTION |
16 |
public static final int |
FAILED |
2 |
public static final int |
GONE |
11 |
public static final int |
MOVED |
12 |
public static final int |
NOTFETCHING |
20 |
public static final int |
NOTFOUND |
14 |
public static final int |
NOTMODIFIED |
21 |
public static final int |
PROTO_NOT_FOUND |
10 |
public static final int |
REDIR_EXCEEDED |
19 |
public static final int |
RETRY |
15 |
public static final int |
ROBOTS_DENIED |
18 |
public static final int |
SUCCESS |
1 |
public static final int |
TEMP_MOVED |
13 |
public static final int |
WOULDBLOCK |
22 |
org.apache.nutch.protocol.ftp.Client | ||
---|---|---|
protected static final int |
TERMINAL_TYPE |
24 |
protected static final int |
TERMINAL_TYPE_IS |
0 |
protected static final int |
TERMINAL_TYPE_SEND |
1 |
org.apache.nutch.protocol.http.api.HttpBase | ||
---|---|---|
public static final int |
BUFFER_SIZE |
8192 |
org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory | ||
---|---|---|
public static final String |
WWW_AUTHENTICATE |
"WWW-Authenticate" |
org.apache.nutch.scoring.webgraph.LinkDatum | ||
---|---|---|
public static final byte |
INLINK |
1 |
public static final byte |
OUTLINK |
2 |
org.apache.nutch.scoring.webgraph.LinkDumper | ||
---|---|---|
public static final String |
DUMP_DIR |
"linkdump" |
org.apache.nutch.scoring.webgraph.Loops | ||
---|---|---|
public static final String |
LOOPS_DIR |
"loops" |
public static final String |
ROUTES_DIR |
"routes" |
org.apache.nutch.scoring.webgraph.WebGraph | ||
---|---|---|
public static final String |
INLINK_DIR |
"inlinks" |
public static final String |
LOCK_NAME |
".locked" |
public static final String |
NODE_DIR |
"nodes" |
public static final String |
OUTLINK_DIR |
"outlinks" |
org.apache.nutch.searcher.FetchedSegments | ||
---|---|---|
public static final long |
VERSION |
1L |
org.apache.nutch.searcher.LuceneSearchBean | ||
---|---|---|
public static final long |
VERSION |
1L |
org.apache.nutch.searcher.NutchBean | ||
---|---|---|
public static final String |
KEY |
"nutchBean" |
org.apache.nutch.searcher.Query.Clause | ||
---|---|---|
public static final String |
DEFAULT_FIELD |
"DEFAULT" |
org.apache.nutch.searcher.QueryParams | ||
---|---|---|
public static final String |
DEFAULT_DEDUP_FIELD |
"site" |
public static final int |
DEFAULT_MAX_HITS_PER_DUP |
2 |
public static final int |
DEFAULT_NUM_HITS |
10 |
public static final boolean |
DEFAULT_REVERSE |
false |
org.apache.nutch.searcher.response.SearchServlet | ||
---|---|---|
public static final String |
DEDUPE |
"ddf" |
public static final String |
FIELDS |
"field" |
public static final String |
LANG |
"lang" |
public static final String |
NUM_DUPES |
"dupes" |
public static final String |
QUERY |
"query" |
public static final String |
RESPONSE_TYPE |
"rt" |
public static final String |
REVERSE |
"reverse" |
public static final String |
ROWS |
"rows" |
public static final String |
SORT |
"sort" |
public static final String |
START |
"start" |
public static final String |
SUMMARY |
"summary" |
org.apache.nutch.tools.arc.ArcSegmentCreator | ||
---|---|---|
public static final String |
URL_VERSION |
"arc.url.version" |
org.apache.nutch.util.EncodingDetector | ||
---|---|---|
public static final String |
MIN_CONFIDENCE_KEY |
"encodingdetector.charset.min.confidence" |
public static final int |
NO_THRESHOLD |
-1 |
org.apache.nutch.util.domain.DomainSuffix | ||
---|---|---|
public static final float |
DEFAULT_BOOST |
1.0f |
|
||||||||||
PREV NEXT | FRAMES NO FRAMES |