-
- All Implemented Interfaces:
-
java.lang.AutoCloseable
public interface PulsarSession implements AutoCloseable
-
-
Method Summary
Modifier and Type Method Description abstract BooleanregisterClosable(AutoCloseable closable)Close objects when sessions close abstract UnitdisableCache()abstract LoadOptionsoptions(String args, PulsarEventHandler eventHandler)Create a new options, with a new volatile config abstract Stringproperty(String name)abstract Unitproperty(String name, String value)abstract NormUrlnormalize(String url, LoadOptions options, Boolean toItemOption)abstract List<NormUrl>normalize(Iterable<String> urls, LoadOptions options, Boolean toItemOption)abstract NormUrlnormalize(UrlAware url, LoadOptions options, Boolean toItemOption)abstract List<NormUrl>normalize(Collection<UrlAware> urls, LoadOptions options, Boolean toItemOption)abstract NormUrlnormalizeOrNull(String url, LoadOptions options, Boolean toItemOption)abstract NormUrlnormalizeOrNull(UrlAware url, LoadOptions options, Boolean toItemOption)abstract WebPageinject(String url)Inject an url to fetch later abstract WebPageget(String url)Get a page from database if exists abstract WebPagegetOrNull(String url)Get a page from database if exists abstract Booleanexists(String url)Check if a page exists in the database abstract CheckStatefetchState(WebPage page, LoadOptions options)Return the fetch state of the page abstract WebPageopen(String url)Open a page with url abstract WebPageload(String url, String args)Load an url with specified options abstract WebPageload(String url, LoadOptions options)Load an url with specified options abstract WebPageload(UrlAware url, String args)abstract WebPageload(UrlAware url, LoadOptions options)abstract WebPageload(NormUrl normUrl)abstract WebPageloadDeferred(String url, LoadOptions options)abstract WebPageloadDeferred(UrlAware url, String args)abstract WebPageloadDeferred(UrlAware url, LoadOptions options)abstract WebPageloadDeferred(NormUrl normUrl)abstract List<WebPage>loadAll(Iterable<String> urls, LoadOptions options, Boolean toItemOption)Load all urls with specified options, this causes a parallel fetching whenever applicable abstract List<WebPage>loadAll(Iterable<NormUrl> normUrls)abstract CompletableFuture<WebPage>loadAsync(NormUrl url)abstract List<CompletableFuture<WebPage>>loadAllAsync(Iterable<NormUrl> urls)abstract AbstractPulsarSessionsubmit(UrlAware url)abstract PulsarSessionsubmitAll(Iterable<UrlAware> urls)abstract List<WebPage>loadOutPages(String portalUrl, String args)Load all out pages in a portal page abstract List<WebPage>loadOutPages(String portalUrl, LoadOptions options)Load all out pages in a portal page abstract List<CompletableFuture<WebPage>>loadOutPagesAsync(String portalUrl, LoadOptions options)abstract PulsarSessionsubmitLoadOutPages(String portalUrl, LoadOptions options)abstract WebPageloadResource(String url, String referer, String args)Load an url as a resource without browser rendering in the browser context abstract WebPageloadResource(String url, String referer, LoadOptions opts)Load an url as a resource without browser rendering in the browser context abstract FeaturedDocumentparse(WebPage page, Boolean noCache)Parse the Web page into DOM. abstract FeaturedDocumentloadDocument(String url, String args)abstract FeaturedDocumentloadDocument(String url, LoadOptions options)abstract FeaturedDocumentloadDocument(NormUrl normUrl)abstract Map<String, String>scrape(String url, String args, Iterable<String> fieldSelectors)abstract Map<String, String>scrape(String url, String args, Map<String, String> fieldSelectors)abstract List<Map<String, String>>scrape(String url, String args, String restrictSelector, Iterable<String> fieldSelectors)abstract List<Map<String, String>>scrape(String url, String args, String restrictSelector, Map<String, String> fieldSelectors)abstract List<Map<String, String>>scrapeOutPages(String portalUrl, String args, Iterable<String> fieldSelectors)abstract List<Map<String, String>>scrapeOutPages(String portalUrl, String args, String restrictSelector, Iterable<String> fieldSelectors)abstract List<Map<String, String>>scrapeOutPages(String portalUrl, String args, Map<String, String> fieldSelectors)abstract List<Map<String, String>>scrapeOutPages(String portalUrl, String args, String restrictSelector, Map<String, String> fieldSelectors)abstract ObjectgetVariable(String name)abstract UnitsetVariable(String name, Object value)abstract UnitputSessionBean(Object obj)abstract Unitdelete(String url)abstract Unitflush()abstract Booleanpersist(WebPage page)abstract Pathexport(WebPage page, String ident)abstract Pathexport(FeaturedDocument doc, String ident)abstract PathexportTo(FeaturedDocument doc, Path path)abstract IntegergetId()The session id. abstract AbstractPulsarContextgetContext()The pulsar context abstract VolatileConfiggetSessionConfig()The session scope volatile config, every setting is supposed to be changed at any time and any place abstract ImmutableConfiggetUnmodifiedConfig()abstract BeanFactorygetSessionBeanFactory()The scoped bean factory: for each volatileConfig object, there is a bean factory TODO: session scoped? abstract StringgetDisplay()abstract ConcurrentExpiringLRUCache<String, WebPage>getPageCache()abstract ConcurrentExpiringLRUCache<String, FeaturedDocument>getDocumentCache()abstract GlobalCacheFactorygetGlobalCacheFactory()-
-
Method Detail
-
registerClosable
abstract Boolean registerClosable(AutoCloseable closable)
Close objects when sessions close
-
disableCache
abstract Unit disableCache()
-
options
abstract LoadOptions options(String args, PulsarEventHandler eventHandler)
Create a new options, with a new volatile config
-
normalize
abstract NormUrl normalize(String url, LoadOptions options, Boolean toItemOption)
-
normalize
abstract List<NormUrl> normalize(Iterable<String> urls, LoadOptions options, Boolean toItemOption)
-
normalize
abstract NormUrl normalize(UrlAware url, LoadOptions options, Boolean toItemOption)
-
normalize
abstract List<NormUrl> normalize(Collection<UrlAware> urls, LoadOptions options, Boolean toItemOption)
-
normalizeOrNull
abstract NormUrl normalizeOrNull(String url, LoadOptions options, Boolean toItemOption)
-
normalizeOrNull
abstract NormUrl normalizeOrNull(UrlAware url, LoadOptions options, Boolean toItemOption)
-
inject
abstract WebPage inject(String url)
Inject an url to fetch later
- Parameters:
url- The url followed by options
-
getOrNull
abstract WebPage getOrNull(String url)
Get a page from database if exists
- Parameters:
url- The url
-
exists
abstract Boolean exists(String url)
Check if a page exists in the database
- Parameters:
url- The url
-
fetchState
abstract CheckState fetchState(WebPage page, LoadOptions options)
Return the fetch state of the page
- Parameters:
page- The webpageoptions- The load options
-
open
abstract WebPage open(String url)
Open a page with url
- Parameters:
url- The url of the page to open
-
load
abstract WebPage load(String url, String args)
Load an url with specified options
- Parameters:
url- The url to loadargs- The load args
-
load
abstract WebPage load(String url, LoadOptions options)
Load an url with specified options
- Parameters:
url- The url to loadoptions- The load options
-
load
abstract WebPage load(UrlAware url, LoadOptions options)
-
loadDeferred
abstract WebPage loadDeferred(String url, LoadOptions options)
-
loadDeferred
abstract WebPage loadDeferred(UrlAware url, String args)
-
loadDeferred
abstract WebPage loadDeferred(UrlAware url, LoadOptions options)
-
loadDeferred
abstract WebPage loadDeferred(NormUrl normUrl)
-
loadAll
abstract List<WebPage> loadAll(Iterable<String> urls, LoadOptions options, Boolean toItemOption)
Load all urls with specified options, this causes a parallel fetching whenever applicable
- Parameters:
urls- The urls to loadoptions- The load options
-
loadAsync
abstract CompletableFuture<WebPage> loadAsync(NormUrl url)
-
loadAllAsync
abstract List<CompletableFuture<WebPage>> loadAllAsync(Iterable<NormUrl> urls)
-
submit
abstract AbstractPulsarSession submit(UrlAware url)
-
submitAll
abstract PulsarSession submitAll(Iterable<UrlAware> urls)
-
loadOutPages
abstract List<WebPage> loadOutPages(String portalUrl, String args)
Load all out pages in a portal page
- Parameters:
portalUrl- The portal url from where to load pagesargs- The load args
-
loadOutPages
abstract List<WebPage> loadOutPages(String portalUrl, LoadOptions options)
Load all out pages in a portal page
- Parameters:
portalUrl- The portal url from where to load pagesoptions- The load options
-
loadOutPagesAsync
abstract List<CompletableFuture<WebPage>> loadOutPagesAsync(String portalUrl, LoadOptions options)
-
submitLoadOutPages
abstract PulsarSession submitLoadOutPages(String portalUrl, LoadOptions options)
-
loadResource
abstract WebPage loadResource(String url, String referer, String args)
Load an url as a resource without browser rendering in the browser context
- Parameters:
url- The url to loadargs- The load args
-
loadResource
abstract WebPage loadResource(String url, String referer, LoadOptions opts)
Load an url as a resource without browser rendering in the browser context
- Parameters:
url- The url to loadopts- The load options
-
parse
abstract FeaturedDocument parse(WebPage page, Boolean noCache)
Parse the Web page into DOM. If the Web page is not changed since last parse, use the last result if available
-
loadDocument
abstract FeaturedDocument loadDocument(String url, String args)
-
loadDocument
abstract FeaturedDocument loadDocument(String url, LoadOptions options)
-
loadDocument
abstract FeaturedDocument loadDocument(NormUrl normUrl)
-
scrape
abstract Map<String, String> scrape(String url, String args, Iterable<String> fieldSelectors)
-
scrape
abstract Map<String, String> scrape(String url, String args, Map<String, String> fieldSelectors)
-
scrape
abstract List<Map<String, String>> scrape(String url, String args, String restrictSelector, Iterable<String> fieldSelectors)
-
scrape
abstract List<Map<String, String>> scrape(String url, String args, String restrictSelector, Map<String, String> fieldSelectors)
-
scrapeOutPages
@ExperimentalApi() abstract List<Map<String, String>> scrapeOutPages(String portalUrl, String args, Iterable<String> fieldSelectors)
-
scrapeOutPages
@ExperimentalApi() abstract List<Map<String, String>> scrapeOutPages(String portalUrl, String args, String restrictSelector, Iterable<String> fieldSelectors)
-
scrapeOutPages
@ExperimentalApi() abstract List<Map<String, String>> scrapeOutPages(String portalUrl, String args, Map<String, String> fieldSelectors)
-
scrapeOutPages
@ExperimentalApi() abstract List<Map<String, String>> scrapeOutPages(String portalUrl, String args, String restrictSelector, Map<String, String> fieldSelectors)
-
getVariable
abstract Object getVariable(String name)
-
setVariable
abstract Unit setVariable(String name, Object value)
-
putSessionBean
abstract Unit putSessionBean(Object obj)
-
getId
abstract Integer getId()
The session id. Session id is expected to be set by the container, e.g. the h2 database runtime
-
getContext
abstract AbstractPulsarContext getContext()
The pulsar context
-
getSessionConfig
abstract VolatileConfig getSessionConfig()
The session scope volatile config, every setting is supposed to be changed at any time and any place
-
getUnmodifiedConfig
abstract ImmutableConfig getUnmodifiedConfig()
-
getSessionBeanFactory
abstract BeanFactory getSessionBeanFactory()
The scoped bean factory: for each volatileConfig object, there is a bean factory TODO: session scoped?
-
getDisplay
abstract String getDisplay()
-
getPageCache
abstract ConcurrentExpiringLRUCache<String, WebPage> getPageCache()
-
getDocumentCache
abstract ConcurrentExpiringLRUCache<String, FeaturedDocument> getDocumentCache()
-
getGlobalCacheFactory
abstract GlobalCacheFactory getGlobalCacheFactory()
-
-
-
-