-
- All Implemented Interfaces:
-
ai.platon.pulsar.crawl.Crawler,java.lang.AutoCloseable
public class StreamingCrawler<T extends UrlAware> extends AbstractCrawler
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description public classStreamingCrawler.Companion
-
Field Summary
Fields Modifier and Type Field Description private final IntegernumPrivacyContextsprivate final IntegernumMaxActiveTabsprivate final IntegerfetchConcurrencyprivate final StringdefaultArgsprivate final DurationoutOfWorkTimeoutprivate final DurationfetchTaskTimeoutprivate final DurationidleTimeprivate final BooleanisOutOfWorkprivate final BooleanisActiveprivate StringjobNameprivate final Integeridprivate Sequence<T>urlsprivate final LoadOptionsdefaultOptionsprivate final GlobalCacheFactoryglobalCacheFactoryprivate final CrawlEventHandlercrawlEventHandlerprivate final Stringnameprivate final BooleannoProxyprivate final AtomicBooleanclosedprivate final PulsarSessionsessionprivate final BooleanautoClose
-
Constructor Summary
Constructors Constructor Description StreamingCrawler(Sequence<T> urls, LoadOptions defaultOptions, PulsarSession session, GlobalCacheFactory globalCacheFactory, CrawlEventHandler crawlEventHandler, String name, Boolean noProxy, Boolean autoClose)
-
Method Summary
-
-
Constructor Detail
-
StreamingCrawler
StreamingCrawler(Sequence<T> urls, LoadOptions defaultOptions, PulsarSession session, GlobalCacheFactory globalCacheFactory, CrawlEventHandler crawlEventHandler, String name, Boolean noProxy, Boolean autoClose)
-
-
Method Detail
-
getNumPrivacyContexts
final Integer getNumPrivacyContexts()
-
getNumMaxActiveTabs
final Integer getNumMaxActiveTabs()
-
getFetchConcurrency
final Integer getFetchConcurrency()
-
getDefaultArgs
final String getDefaultArgs()
-
getOutOfWorkTimeout
final Duration getOutOfWorkTimeout()
-
getFetchTaskTimeout
final Duration getFetchTaskTimeout()
-
getIdleTime
final Duration getIdleTime()
-
getIsOutOfWork
final Boolean getIsOutOfWork()
-
getIsActive
Boolean getIsActive()
-
getJobName
final String getJobName()
-
setJobName
final Unit setJobName(String jobName)
-
getDefaultOptions
final LoadOptions getDefaultOptions()
The default load options
-
getGlobalCacheFactory
final GlobalCacheFactory getGlobalCacheFactory()
A optional global cache which will hold the retry tasks
-
getCrawlEventHandler
final CrawlEventHandler getCrawlEventHandler()
The crawl event handler
-
getNoProxy
final Boolean getNoProxy()
Do not use proxy
-
getClosed
final AtomicBoolean getClosed()
-
getSession
final PulsarSession getSession()
-
getAutoClose
final Boolean getAutoClose()
-
-
-
-