|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.lenya.search.crawler.CrawlerConfiguration
public class CrawlerConfiguration
Web-Crawler (it might make sense to replace this by Nutch)
Constructor Summary | |
---|---|
CrawlerConfiguration(java.lang.String configurationFilePath)
Creates a new CrawlerConfiguration object. |
Method Summary | |
---|---|
void |
configure(org.w3c.dom.Element root)
Extract parameters from configuration |
java.lang.String |
getBaseURL()
DOCUMENT ME! |
java.lang.String |
getHTDocsDumpDir()
Get htdocs-dump-dir/@src |
java.lang.String |
getHTDocsDumpDirResolved()
Get htdocs-dump-dir/@src as absolute path |
java.lang.String |
getRobotsDomain()
Get robots/@domain |
java.lang.String |
getRobotsFile()
Get robots/@src |
java.lang.String |
getRobotsFileResolved()
Get robots/@src as absolute path |
java.lang.String |
getScopeURL()
DOCUMENT ME! |
java.lang.String |
getURIList()
Get URI list path |
java.lang.String |
getURIListResolved()
Get URI list path as absolute path |
java.lang.String |
getUserAgent()
DOCUMENT ME! |
static void |
main(java.lang.String[] args)
DOCUMENT ME! |
java.lang.String |
resolvePath(java.lang.String path)
Resolve path |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public CrawlerConfiguration(java.lang.String configurationFilePath)
configurationFilePath
- DOCUMENT ME!Method Detail |
---|
public static void main(java.lang.String[] args)
args
- DOCUMENT ME!public void configure(org.w3c.dom.Element root) throws java.lang.Exception
configuration
- DOCUMENT ME!
java.lang.Exception
- DOCUMENT ME!public java.lang.String getBaseURL()
public java.lang.String getScopeURL()
public java.lang.String getUserAgent()
public java.lang.String getURIList()
public java.lang.String getURIListResolved()
public java.lang.String getHTDocsDumpDir()
public java.lang.String getHTDocsDumpDirResolved()
public java.lang.String getRobotsFile()
public java.lang.String getRobotsFileResolved()
public java.lang.String getRobotsDomain()
public java.lang.String resolvePath(java.lang.String path)
path
- Original path
|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |