com.norconex.collector.http.crawler
Interface IHttpCrawlerEventListener

All Known Implementing Classes:
HttpCrawlerEventAdapter

public interface IHttpCrawlerEventListener

Allows implementers to react to any crawler-specific events.

CAUTION: Implementors should not implement this interface directly. They are strongly advised to subclass the HttpCrawlerEventAdapter class instead for forward compatibility.

Keep in mind that if defined as part of crawler defaults, a single instance of this listener will be shared amongst crawlers (unless overwritten).

Author:
Pascal Essiembre

Method Summary
 void crawlerFinished(HttpCrawler crawler)
           
 void crawlerStarted(HttpCrawler crawler)
           
 void documentCrawled(HttpCrawler crawler, HttpDocument document)
           
 void documentFetched(HttpCrawler crawler, HttpDocument document, IHttpDocumentFetcher fetcher)
           
 void documentHeadersFetched(HttpCrawler crawler, String url, IHttpHeadersFetcher headersFetcher, Properties headers)
           
 void documentHeadersRejected(HttpCrawler crawler, String url, IHttpHeadersFilter filter, Properties headers)
           
 void documentImported(HttpCrawler crawler, HttpDocument document)
           
 void documentPostProcessed(HttpCrawler crawler, HttpDocument document, IHttpDocumentProcessor postProcessor)
           
 void documentPreProcessed(HttpCrawler crawler, HttpDocument document, IHttpDocumentProcessor preProcessor)
           
 void documentRejected(HttpCrawler crawler, HttpDocument document, IHttpDocumentFilter filter)
           
 void documentRobotsTxtRejected(HttpCrawler crawler, String url, IURLFilter filter, RobotsTxt robotsTxt)
           
 void documentURLRejected(HttpCrawler crawler, String url, IURLFilter filter)
           
 void documentURLsExtracted(HttpCrawler crawler, HttpDocument document)
           
 

Method Detail

crawlerStarted

void crawlerStarted(HttpCrawler crawler)

documentRobotsTxtRejected

void documentRobotsTxtRejected(HttpCrawler crawler,
                               String url,
                               IURLFilter filter,
                               RobotsTxt robotsTxt)

documentURLRejected

void documentURLRejected(HttpCrawler crawler,
                         String url,
                         IURLFilter filter)

documentHeadersFetched

void documentHeadersFetched(HttpCrawler crawler,
                            String url,
                            IHttpHeadersFetcher headersFetcher,
                            Properties headers)

documentHeadersRejected

void documentHeadersRejected(HttpCrawler crawler,
                             String url,
                             IHttpHeadersFilter filter,
                             Properties headers)

documentFetched

void documentFetched(HttpCrawler crawler,
                     HttpDocument document,
                     IHttpDocumentFetcher fetcher)

documentURLsExtracted

void documentURLsExtracted(HttpCrawler crawler,
                           HttpDocument document)

documentRejected

void documentRejected(HttpCrawler crawler,
                      HttpDocument document,
                      IHttpDocumentFilter filter)

documentPreProcessed

void documentPreProcessed(HttpCrawler crawler,
                          HttpDocument document,
                          IHttpDocumentProcessor preProcessor)

documentImported

void documentImported(HttpCrawler crawler,
                      HttpDocument document)

documentPostProcessed

void documentPostProcessed(HttpCrawler crawler,
                           HttpDocument document,
                           IHttpDocumentProcessor postProcessor)

documentCrawled

void documentCrawled(HttpCrawler crawler,
                     HttpDocument document)

crawlerFinished

void crawlerFinished(HttpCrawler crawler)


Copyright © 2009-2013 Norconex Inc.. All Rights Reserved.