-
- All Implemented Interfaces:
-
ai.platon.pulsar.common.config.Parameterized,ai.platon.pulsar.crawl.common.JobInitialized,java.lang.AutoCloseable
public final class TaskScheduler implements Parameterized, JobInitialized, AutoCloseable
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description public final classTaskScheduler.Statuspublic classTaskScheduler.Companion
-
Field Summary
Fields Modifier and Type Field Description private final Integeridprivate BooleanskipTruncatedprivate BooleanstoreContentprivate BooleanindexJITprivate Booleanparseprivate InstantlastTaskStartTimeprivate InstantlastTaskFinishTimeprivate final Stringnameprivate final TaskMonitortasksMonitorprivate final PageParserpageParserprivate final JITIndexerjitIndexerprivate final CoreMetricscoreMetricsprivate final MiscMessageWritermessageWriterprivate final ImmutableConfigimmutableConfig
-
Constructor Summary
Constructors Constructor Description TaskScheduler(TaskMonitor tasksMonitor, PageParser pageParser, JITIndexer jitIndexer, CoreMetrics coreMetrics, MiscMessageWriter messageWriter, ImmutableConfig immutableConfig)
-
Method Summary
Modifier and Type Method Description final IntegergetId()final BooleangetSkipTruncated()final UnitsetSkipTruncated(Boolean skipTruncated)final BooleangetStoreContent()final UnitsetStoreContent(Boolean storeContent)final BooleangetIndexJIT()final UnitsetIndexJIT(Boolean indexJIT)final BooleangetParse()final UnitsetParse(Boolean parse)final InstantgetLastTaskStartTime()final UnitsetLastTaskStartTime(Instant lastTaskStartTime)final InstantgetLastTaskFinishTime()final UnitsetLastTaskFinishTime(Instant lastTaskFinishTime)final StringgetName()final TaskMonitorgetTasksMonitor()final PageParsergetPageParser()final JITIndexergetJitIndexer()final CoreMetricsgetCoreMetrics()final MiscMessageWritergetMessageWriter()final ImmutableConfiggetImmutableConfig()Unitsetup(ImmutableConfig jobConf)ParamsgetParams()final JobFetchTaskschedule()Schedule a queue with the given priority and given poolId final List<JobFetchTask>schedule(Integer number)Schedule the queues with top priority final List<JobFetchTask>schedule(PoolId poolId, Integer number)Null queue id means the queue with top priority Consume a fetch item and try to download the target web page final UnitfinishUnchecked(JobFetchTask fetchTask)Finish the fetch item anyway, even if it's failed to download the target page final Unitfinish(PoolId poolId, Integer itemId)Finished downloading the web pageMultiple threaded, non-synchronized class member variables are not allowed inside this method. final UnitupdateCounters()Wait for a while and report task statusTODO: use metrics system instead Unitclose()-
-
Constructor Detail
-
TaskScheduler
TaskScheduler(TaskMonitor tasksMonitor, PageParser pageParser, JITIndexer jitIndexer, CoreMetrics coreMetrics, MiscMessageWriter messageWriter, ImmutableConfig immutableConfig)
-
-
Method Detail
-
getSkipTruncated
final Boolean getSkipTruncated()
-
setSkipTruncated
final Unit setSkipTruncated(Boolean skipTruncated)
-
getStoreContent
final Boolean getStoreContent()
-
setStoreContent
final Unit setStoreContent(Boolean storeContent)
-
getIndexJIT
final Boolean getIndexJIT()
-
setIndexJIT
final Unit setIndexJIT(Boolean indexJIT)
-
getLastTaskStartTime
final Instant getLastTaskStartTime()
-
setLastTaskStartTime
final Unit setLastTaskStartTime(Instant lastTaskStartTime)
-
getLastTaskFinishTime
final Instant getLastTaskFinishTime()
-
setLastTaskFinishTime
final Unit setLastTaskFinishTime(Instant lastTaskFinishTime)
-
getTasksMonitor
final TaskMonitor getTasksMonitor()
-
getPageParser
final PageParser getPageParser()
-
getJitIndexer
final JITIndexer getJitIndexer()
-
getCoreMetrics
final CoreMetrics getCoreMetrics()
-
getMessageWriter
final MiscMessageWriter getMessageWriter()
-
getImmutableConfig
final ImmutableConfig getImmutableConfig()
-
getParams
Params getParams()
-
schedule
final JobFetchTask schedule()
Schedule a queue with the given priority and given poolId
-
schedule
final List<JobFetchTask> schedule(Integer number)
Schedule the queues with top priority
-
schedule
final List<JobFetchTask> schedule(PoolId poolId, Integer number)
Null queue id means the queue with top priority Consume a fetch item and try to download the target web page
-
finishUnchecked
final Unit finishUnchecked(JobFetchTask fetchTask)
Finish the fetch item anyway, even if it's failed to download the target page
-
finish
final Unit finish(PoolId poolId, Integer itemId)
Finished downloading the web page
Multiple threaded, non-synchronized class member variables are not allowed inside this method.
-
updateCounters
final Unit updateCounters()
Wait for a while and report task status
TODO: use metrics system instead
-
-
-
-