package ai.platon.pulsar.persist;

import ai.platon.pulsar.common.DateTimes;
import ai.platon.pulsar.persist.metadata.CrawlVariables;
import ai.platon.pulsar.persist.metadata.Name;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.time.temporal.TemporalUnit;
import java.util.List;
import org.apache.avro.util.Utf8;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* loaded from: input_file:ai/platon/pulsar/persist/WebPageExt.class */
public class WebPageExt {
    private final WebPage page;

    public WebPageExt(WebPage webPage) {
        this.page = webPage;
    }

    @NotNull
    public String sniffTitle() {
        String contentTitle = this.page.getContentTitle();
        if (contentTitle.isEmpty()) {
            contentTitle = this.page.getAnchor().toString();
        }
        if (contentTitle.isEmpty()) {
            contentTitle = this.page.getPageTitle();
        }
        if (contentTitle.isEmpty()) {
            contentTitle = this.page.getLocation();
        }
        if (contentTitle.isEmpty()) {
            contentTitle = this.page.getUrl();
        }
        return contentTitle;
    }

    public void setTextCascaded(String str) {
        this.page.setContent(str);
        this.page.setContentText(str);
        this.page.setPageText(str);
    }

    public void addHyperlinks(Iterable<HyperlinkPersistable> iterable) {
        List<CharSequence> links = this.page.getLinks();
        if (links.size() > 4000) {
            links = links.subList(links.size() - 1333, links.size());
        }
        for (HyperlinkPersistable hyperlinkPersistable : iterable) {
            WebPage webPage = this.page;
            Utf8 u8 = WebPage.u8(hyperlinkPersistable.getUrl());
            if (!links.contains(u8)) {
                links.add(u8);
            }
        }
        this.page.setLinks(links);
        this.page.setImpreciseLinkCount(links.size());
    }

    public void addLinks(Iterable<CharSequence> iterable) {
        List<CharSequence> links = this.page.getLinks();
        if (links.size() > 4000) {
            links = links.subList(links.size() - 1333, links.size());
        }
        for (CharSequence charSequence : iterable) {
            WebPage webPage = this.page;
            Utf8 u8 = WebPage.u8(charSequence.toString());
            if (!links.contains(u8)) {
                links.add(u8);
            }
        }
        this.page.setLinks(links);
        this.page.setImpreciseLinkCount(links.size());
    }

    public boolean updateContentPublishTime(Instant instant) {
        if (!this.page.isValidContentModifyTime(instant)) {
            return false;
        }
        Instant contentPublishTime = this.page.getContentPublishTime();
        if (!instant.isAfter(contentPublishTime)) {
            return true;
        }
        this.page.setPrevContentPublishTime(contentPublishTime);
        this.page.setContentPublishTime(instant);
        return true;
    }

    public boolean updateContentModifiedTime(Instant instant) {
        if (!this.page.isValidContentModifyTime(instant)) {
            return false;
        }
        Instant contentModifiedTime = this.page.getContentModifiedTime();
        if (!instant.isAfter(contentModifiedTime)) {
            return true;
        }
        this.page.setPrevContentModifiedTime(contentModifiedTime);
        this.page.setContentModifiedTime(instant);
        return true;
    }

    public boolean updateRefContentPublishTime(Instant instant) {
        if (!this.page.isValidContentModifyTime(instant)) {
            return false;
        }
        Instant refContentPublishTime = this.page.getRefContentPublishTime();
        if (!instant.isAfter(refContentPublishTime)) {
            return false;
        }
        this.page.setPrevRefContentPublishTime(refContentPublishTime);
        this.page.setRefContentPublishTime(instant);
        return true;
    }

    public Instant getFirstIndexTime(Instant instant) {
        Instant instant2 = null;
        String indexTimeHistory = this.page.getIndexTimeHistory(CrawlVariables.UNKNOWN);
        if (!indexTimeHistory.isEmpty()) {
            Instant parseInstant = DateTimes.parseInstant(indexTimeHistory.split(",")[0], Instant.EPOCH);
            if (parseInstant.isAfter(Instant.EPOCH)) {
                instant2 = parseInstant;
            }
        }
        return instant2 == null ? instant : instant2;
    }

    public void updateFetchTimeHistory(@NotNull Instant instant) {
        this.page.getMetadata().set(Name.FETCH_TIME_HISTORY, DateTimes.constructTimeHistory(this.page.getMetadata().get(Name.FETCH_TIME_HISTORY), instant, 10));
    }

    public void updateFetchTime(Instant instant, Instant instant2) {
        this.page.setPrevFetchTime(instant);
        this.page.setFetchTime(instant2);
        updateFetchTimeHistory(instant2);
    }

    @Nullable
    public Instant getFirstFetchTime() {
        Instant instant = null;
        String fetchTimeHistory = this.page.getFetchTimeHistory(CrawlVariables.UNKNOWN);
        if (!fetchTimeHistory.isEmpty()) {
            Instant parseInstant = DateTimes.parseInstant(fetchTimeHistory.split(",")[0], Instant.EPOCH);
            if (parseInstant.isAfter(Instant.EPOCH)) {
                instant = parseInstant;
            }
        }
        return instant;
    }

    @NotNull
    public Instant sniffModifiedTime() {
        Instant modifiedTime = this.page.getModifiedTime();
        Instant lastModified = this.page.getHeaders().getLastModified();
        Instant contentModifiedTime = this.page.getContentModifiedTime();
        if (this.page.isValidContentModifyTime(lastModified) && lastModified.isAfter(modifiedTime)) {
            modifiedTime = lastModified;
        }
        if (this.page.isValidContentModifyTime(contentModifiedTime) && contentModifiedTime.isAfter(modifiedTime)) {
            modifiedTime = contentModifiedTime;
        }
        Instant contentPublishTime = this.page.getContentPublishTime();
        if (this.page.isValidContentModifyTime(contentPublishTime) && contentPublishTime.isAfter(modifiedTime)) {
            modifiedTime = contentPublishTime;
        }
        if (modifiedTime.isAfter(Instant.now().plus(1L, (TemporalUnit) ChronoUnit.DAYS))) {
            modifiedTime = Instant.now();
        }
        return modifiedTime;
    }
}
