com.norconex.collector.http.crawler
Class HttpCrawlerConfig
java.lang.Object
com.norconex.collector.http.crawler.HttpCrawlerConfig
- All Implemented Interfaces:
- Serializable, Cloneable
public class HttpCrawlerConfig
- extends Object
- implements Cloneable, Serializable
- See Also:
- Serialized Form
HttpCrawlerConfig
public HttpCrawlerConfig()
getId
public String getId()
setId
public void setId(String id)
getStartURLs
public String[] getStartURLs()
setStartURLs
public void setStartURLs(String[] startURLs)
setMaxDepth
public void setMaxDepth(int depth)
getMaxDepth
public int getMaxDepth()
setWorkDir
public void setWorkDir(File workDir)
getWorkDir
public File getWorkDir()
getNumThreads
public int getNumThreads()
setNumThreads
public void setNumThreads(int numThreads)
getMaxURLs
public int getMaxURLs()
setMaxURLs
public void setMaxURLs(int maxURLs)
getHttpDocumentfilters
public IHttpDocumentFilter[] getHttpDocumentfilters()
setHttpDocumentfilters
public void setHttpDocumentfilters(IHttpDocumentFilter[] documentfilters)
getURLFilters
public IURLFilter[] getURLFilters()
setURLFilters
public void setURLFilters(IURLFilter[] urlFilters)
getImporterConfig
public ImporterConfig getImporterConfig()
setImporterConfig
public void setImporterConfig(ImporterConfig importerConfig)
getHttpClientInitializer
public IHttpClientInitializer getHttpClientInitializer()
setHttpClientInitializer
public void setHttpClientInitializer(IHttpClientInitializer httpClientInitializer)
getHttpDocumentFetcher
public IHttpDocumentFetcher getHttpDocumentFetcher()
setHttpDocumentFetcher
public void setHttpDocumentFetcher(IHttpDocumentFetcher httpDocumentFetcher)
getHttpHeadersFetcher
public IHttpHeadersFetcher getHttpHeadersFetcher()
setHttpHeadersFetcher
public void setHttpHeadersFetcher(IHttpHeadersFetcher httpHeadersFetcher)
getUrlExtractor
public IURLExtractor getUrlExtractor()
setUrlExtractor
public void setUrlExtractor(IURLExtractor urlExtractor)
getRobotsTxtProvider
public IRobotsTxtProvider getRobotsTxtProvider()
setRobotsTxtProvider
public void setRobotsTxtProvider(IRobotsTxtProvider robotsTxtProvider)
getUrlNormalizer
public IURLNormalizer getUrlNormalizer()
setUrlNormalizer
public void setUrlNormalizer(IURLNormalizer urlNormalizer)
isDeleteOrphans
public boolean isDeleteOrphans()
setDeleteOrphans
public void setDeleteOrphans(boolean deleteOrphans)
getDelayResolver
public IDelayResolver getDelayResolver()
setDelayResolver
public void setDelayResolver(IDelayResolver delayResolver)
getCrawlerListeners
public IHttpCrawlerEventListener[] getCrawlerListeners()
setCrawlerListeners
public void setCrawlerListeners(IHttpCrawlerEventListener[] crawlerListeners)
getHttpHeadersFilters
public IHttpHeadersFilter[] getHttpHeadersFilters()
setHttpHeadersFilters
public void setHttpHeadersFilters(IHttpHeadersFilter[] httpHeadersFilters)
getPreImportProcessors
public IHttpDocumentProcessor[] getPreImportProcessors()
setPreImportProcessors
public void setPreImportProcessors(IHttpDocumentProcessor[] httpPreProcessors)
getPostImportProcessors
public IHttpDocumentProcessor[] getPostImportProcessors()
setPostImportProcessors
public void setPostImportProcessors(IHttpDocumentProcessor[] httpPostProcessors)
isIgnoreRobotsTxt
public boolean isIgnoreRobotsTxt()
setIgnoreRobotsTxt
public void setIgnoreRobotsTxt(boolean ignoreRobotsTxt)
getCommitter
public ICommitter getCommitter()
setCommitter
public void setCommitter(ICommitter committer)
isKeepDownloads
public boolean isKeepDownloads()
setKeepDownloads
public void setKeepDownloads(boolean keepDownloads)
getHttpHeadersChecksummer
public IHttpHeadersChecksummer getHttpHeadersChecksummer()
setHttpHeadersChecksummer
public void setHttpHeadersChecksummer(IHttpHeadersChecksummer httpHeadersChecksummer)
getHttpDocumentChecksummer
public IHttpDocumentChecksummer getHttpDocumentChecksummer()
setHttpDocumentChecksummer
public void setHttpDocumentChecksummer(IHttpDocumentChecksummer httpDocumentChecksummer)
getCrawlURLDatabaseFactory
public ICrawlURLDatabaseFactory getCrawlURLDatabaseFactory()
setCrawlURLDatabaseFactory
public void setCrawlURLDatabaseFactory(ICrawlURLDatabaseFactory crawlURLDatabaseFactory)
clone
protected Object clone()
throws CloneNotSupportedException
- Overrides:
clone
in class Object
- Throws:
CloneNotSupportedException
Copyright © 2009-2013 Norconex Inc.. All Rights Reserved.