-
public final class CrawlFiltersTODO : need full unit test TODO : Move to plugin, urlfilter/contentfilter, etc
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description public classCrawlFilters.Companion
-
Field Summary
Fields Modifier and Type Field Description private final Map<String, String>reversedKeyRangesprivate final Array<String>maxReversedKeyRangeprivate final List<CrawlFilter>crawlFiltersprivate final CrawlUrlNormalizersurlNormalizersprivate final CrawlUrlFiltersurlFiltersprivate final Stringscopeprivate final ImmutableConfigconf
-
Constructor Summary
Constructors Constructor Description CrawlFilters(ImmutableConfig conf)CrawlFilters(List<CrawlFilter> crawlFilters, CrawlUrlNormalizers urlNormalizers, CrawlUrlFilters urlFilters, String scope, ImmutableConfig conf)
-
Method Summary
-
-
Constructor Detail
-
CrawlFilters
CrawlFilters(ImmutableConfig conf)
-
CrawlFilters
CrawlFilters(List<CrawlFilter> crawlFilters, CrawlUrlNormalizers urlNormalizers, CrawlUrlFilters urlFilters, String scope, ImmutableConfig conf)
-
-
Method Detail
-
getReversedKeyRanges
final Map<String, String> getReversedKeyRanges()
-
getMaxReversedKeyRange
final Array<String> getMaxReversedKeyRange()
-
getCrawlFilters
final List<CrawlFilter> getCrawlFilters()
-
getUrlNormalizers
final CrawlUrlNormalizers getUrlNormalizers()
-
getUrlFilters
final CrawlUrlFilters getUrlFilters()
-
getConf
final ImmutableConfig getConf()
-
isNormalizedValid
final Boolean isNormalizedValid(HyperlinkPersistable hyperlink)
-
isNormalizedValid
final Boolean isNormalizedValid(String url)
-
normalizeToEmpty
final String normalizeToEmpty(String url, String scope)
-
normalizeToEmpty
final String normalizeToEmpty(String url)
-
normalizeToNull
final String normalizeToNull(String url, String scope)
-
testUrlSatisfied
final Boolean testUrlSatisfied(String url)
-
testTextSatisfied
final Boolean testTextSatisfied(String text)
-
testKeyRangeSatisfied
final Boolean testKeyRangeSatisfied(String reversedUrl)
-
isDisallowed
final Boolean isDisallowed(Node node)
TODO : Tricky logic
-
veryLikelyBeDetailUrl
final Boolean veryLikelyBeDetailUrl(String url)
-
veryLikelyBeIndexUrl
final Boolean veryLikelyBeIndexUrl(String url)
-
veryLikelyBeMediaUrl
final Boolean veryLikelyBeMediaUrl(String url)
-
veryLikelyBeSearchUrl
final Boolean veryLikelyBeSearchUrl(String url)
Notice : index url is not a search url even if it contains "search"
-
-
-
-