package net.sf.okapi.steps.diffleverage;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.annotation.AltTranslation;
import net.sf.okapi.common.exceptions.OkapiBadStepInputException;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.filters.IFilterConfigurationMapper;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.query.MatchType;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.MultiEvent;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.lib.extra.diff.incava.DiffLists;
import net.sf.okapi.lib.search.lucene.analysis.AlphabeticNgramTokenizer;
import net.sf.okapi.lib.search.lucene.scorer.Util;

@UsingParameters(Parameters.class)
/* loaded from: input_file:net/sf/okapi/steps/diffleverage/DiffLeverageStep.class */
public class DiffLeverageStep extends BasePipelineStep {
    private static final int NGRAM_SIZE = 3;
    private IFilterConfigurationMapper fcMapper;
    private RawDocument oldSource;
    private RawDocument oldTarget;
    private List<ITextUnit> newTextUnits;
    private List<ITextUnit> oldTextUnits;
    private List<Event> newDocumentEvents;
    private LocaleId sourceLocale;
    private LocaleId targetLocale;
    private Comparator<ITextUnit> sourceComparator;
    private AlphabeticNgramTokenizer tokenizer;
    private boolean done = true;
    private Parameters params = new Parameters();

    @StepParameterMapping(parameterType = StepParameterType.FILTER_CONFIGURATION_MAPPER)
    public void setFilterConfigurationMapper(IFilterConfigurationMapper iFilterConfigurationMapper) {
        this.fcMapper = iFilterConfigurationMapper;
    }

    @StepParameterMapping(parameterType = StepParameterType.SOURCE_LOCALE)
    public void setSourceLocale(LocaleId localeId) {
        this.sourceLocale = localeId;
    }

    @StepParameterMapping(parameterType = StepParameterType.TARGET_LOCALE)
    public void setTargetLocale(LocaleId localeId) {
        this.targetLocale = localeId;
    }

    @StepParameterMapping(parameterType = StepParameterType.SECOND_INPUT_RAWDOC)
    public void setSecondInput(RawDocument rawDocument) {
        this.oldSource = rawDocument;
    }

    @StepParameterMapping(parameterType = StepParameterType.THIRD_INPUT_RAWDOC)
    public void setTertiaryInput(RawDocument rawDocument) {
        this.oldTarget = rawDocument;
    }

    public String getDescription() {
        return "Compare two source documents (i.e., different versions) and copy the old target content when we find a match. Can be a monolingual and bi-lingual input or three monolingual inputs. Paragraphs (TextUnits) must align in all cases";
    }

    public String getName() {
        return "Diff Leverage";
    }

    public IParameters getParameters() {
        return this.params;
    }

    public void setParameters(IParameters iParameters) {
        this.params = (Parameters) iParameters;
    }

    protected Event handleStartBatch(Event event) {
        this.done = true;
        if (this.params.getFuzzyThreshold() >= 100) {
            this.sourceComparator = new TextUnitComparator(this.params.isCodesensitive());
        } else {
            this.tokenizer = Util.createNgramTokenizer(NGRAM_SIZE, this.sourceLocale);
            this.sourceComparator = new FuzzyTextUnitComparator(this.params.isCodesensitive(), this.params.getFuzzyThreshold(), this.sourceLocale);
        }
        return event;
    }

    protected Event handleEndBatch(Event event) {
        return event;
    }

    protected Event handleRawDocument(Event event) {
        throw new OkapiBadStepInputException("Encountered a RAW_DOCUMENT event. Expected a filtered event stream.");
    }

    protected Event handleStartDocument(Event event) {
        if (this.oldSource != null) {
            this.done = false;
            this.newTextUnits = new ArrayList();
            this.oldTextUnits = new ArrayList();
            this.newDocumentEvents = new LinkedList();
            getOldDocumentTextUnits();
        }
        return event;
    }

    protected Event handleEndDocument(Event event) {
        this.done = true;
        if (this.oldSource == null) {
            return event;
        }
        diffLeverage();
        this.newDocumentEvents.add(event);
        Event event2 = new Event(EventType.MULTI_EVENT, new MultiEvent(this.newDocumentEvents));
        this.newTextUnits = null;
        this.oldTextUnits = null;
        this.newDocumentEvents = null;
        return event2;
    }

    protected Event handleStartSubDocument(Event event) {
        if (this.oldSource == null) {
            return event;
        }
        this.newDocumentEvents.add(event);
        return Event.createNoopEvent();
    }

    protected Event handleEndSubDocument(Event event) {
        if (this.oldSource == null) {
            return event;
        }
        this.newDocumentEvents.add(event);
        return Event.createNoopEvent();
    }

    protected Event handleStartGroup(Event event) {
        if (this.oldSource == null) {
            return event;
        }
        this.newDocumentEvents.add(event);
        return Event.createNoopEvent();
    }

    protected Event handleEndGroup(Event event) {
        if (this.oldSource == null) {
            return event;
        }
        this.newDocumentEvents.add(event);
        return Event.createNoopEvent();
    }

    protected Event handleTextUnit(Event event) {
        if (event.getTextUnit().getSource().hasBeenSegmented()) {
            throw new OkapiBadStepInputException("DiffLeverageStep only aligns unsegmented TextUnits");
        }
        if (this.oldSource == null) {
            return event;
        }
        this.newTextUnits.add(event.getTextUnit());
        this.newDocumentEvents.add(event);
        return Event.createNoopEvent();
    }

    protected Event handleDocumentPart(Event event) {
        if (this.oldSource == null) {
            return event;
        }
        this.newDocumentEvents.add(event);
        return Event.createNoopEvent();
    }

    public boolean isDone() {
        return this.done;
    }

    private void getOldDocumentTextUnits() {
        IFilter iFilter = null;
        try {
            IFilter createFilter = this.fcMapper.createFilter(this.oldSource.getFilterConfigId(), (IFilter) null);
            Throwable th = null;
            try {
                try {
                    if (this.oldTarget != null) {
                        iFilter = this.fcMapper.createFilter(this.oldSource.getFilterConfigId(), (IFilter) null);
                        iFilter.open(this.oldTarget);
                    }
                    createFilter.open(this.oldSource);
                    while (createFilter.hasNext()) {
                        Event next = createFilter.next();
                        if (next.getEventType() == EventType.TEXT_UNIT) {
                            ITextUnit textUnit = next.getTextUnit();
                            if (this.oldTarget != null) {
                                textUnit.setTarget(this.targetLocale, synchronize(iFilter, EventType.TEXT_UNIT).getTextUnit().getSource());
                            }
                            this.oldTextUnits.add(textUnit);
                        }
                    }
                    if (createFilter != null) {
                        if (0 != 0) {
                            try {
                                createFilter.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            createFilter.close();
                        }
                    }
                } catch (Throwable th3) {
                    th = th3;
                    throw th3;
                }
            } finally {
            }
        } finally {
            if (iFilter != null) {
                iFilter.close();
            }
        }
    }

    private Event synchronize(IFilter iFilter, EventType eventType) {
        boolean z = false;
        Event event = null;
        while (!z && iFilter.hasNext()) {
            event = iFilter.next();
            z = event.getEventType() == eventType;
            if (event.isTextUnit() && event.getTextUnit().getSource().hasBeenSegmented()) {
                throw new OkapiBadStepInputException("DiffLeverageStep only aligns unsegmented TextUnits");
            }
        }
        if (z) {
            return event;
        }
        throw new OkapiException("Different number of source or target TextUnits. The source and target documents are not paragraph aligned.");
    }

    private void diffLeverage() {
        DiffLists diffLists = new DiffLists(this.oldTextUnits, this.newTextUnits, this.sourceComparator);
        diffLists.diff();
        for (Map.Entry entry : diffLists.getMatches().entrySet()) {
            ITextUnit iTextUnit = this.oldTextUnits.get(((Integer) entry.getKey()).intValue());
            ITextUnit iTextUnit2 = this.newTextUnits.get(((Integer) entry.getValue()).intValue());
            TextContainer target = iTextUnit.getTarget(this.targetLocale);
            if (target != null) {
                if (!this.params.isDiffOnly()) {
                    int calculateNgramDiceCoefficient = this.params.getFuzzyThreshold() < 100 ? (int) Util.calculateNgramDiceCoefficient(iTextUnit.getSource().getFirstContent().toString(), iTextUnit2.getSource().getFirstContent().toString(), this.tokenizer) : 100;
                    iTextUnit2.getSource().getFirstContent().alignCodeIds(target.getFirstContent());
                    target.setContent(TextUnitUtil.copySrcCodeDataToMatchingTrgCodes(iTextUnit2.getSource().getFirstContent(), target.getFirstContent(), true, false, (TextFragment) null, iTextUnit2));
                    if (this.params.isCopyToTarget()) {
                        iTextUnit2.setTarget(this.targetLocale, target);
                    }
                    TextUnitUtil.addAltTranslation(iTextUnit2.createTarget(this.targetLocale, false, 2), new AltTranslation(this.sourceLocale, this.targetLocale, iTextUnit2.getSource().getUnSegmentedContentCopy(), iTextUnit.getSource().getUnSegmentedContentCopy(), target.getUnSegmentedContentCopy(), this.params.getFuzzyThreshold() >= 100 ? MatchType.EXACT_PREVIOUS_VERSION : MatchType.FUZZY_PREVIOUS_VERSION, calculateNgramDiceCoefficient, getName())).sort();
                }
                iTextUnit2.createTarget(this.targetLocale, false, 2).setAnnotation(new DiffMatchAnnotation());
            }
        }
    }
}
