package net.sf.okapi.filters.mosestext;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.IdGenerator;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.encoder.EncoderManager;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.exceptions.OkapiUnsupportedEncodingException;
import net.sf.okapi.common.filters.FilterConfiguration;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.filters.IFilterConfigurationMapper;
import net.sf.okapi.common.filterwriter.GenericFilterWriter;
import net.sf.okapi.common.filterwriter.IFilterWriter;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.Ending;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.StartDocument;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnit;
import net.sf.okapi.common.skeleton.GenericSkeleton;
import net.sf.okapi.common.skeleton.GenericSkeletonWriter;
import net.sf.okapi.common.skeleton.ISkeletonWriter;
import net.sf.okapi.filters.xliff.CodeTypeForPairedTagsHelper;

@UsingParameters
/* loaded from: input_file:net/sf/okapi/filters/mosestext/MosesTextFilter.class */
public class MosesTextFilter implements IFilter {
    public static final String MOSESTEXT_MIME_TYPE = "text/x-mosestext";
    private static final String ENDSEGMENT = "</mrk>";
    private static final Pattern STARTSEGMENT = Pattern.compile("<mrk\\s+mtype\\s*=\\s*?[\"']seg[\"'].*?>");
    private static final Pattern OPENCLOSE = Pattern.compile("(\\<g(\\s+)id=['\"](.*?)['\"]>)|(\\</g\\>)");
    private static final Pattern ISOLATED = Pattern.compile("\\<(bx|ex|x)(\\s+)id=['\"](.*?)['\"](\\s*?)/>");
    private static final Pattern LINEBREAK = Pattern.compile("(\\<lb\\s*?/>)");
    private BufferedReader reader;
    private String lineBreak;
    private Event event;
    private IdGenerator tuIdGen;
    private EncoderManager encoderManager;
    private GenericSkeleton skel;
    private RawDocument input;

    @Override // net.sf.okapi.common.filters.IFilter
    public void cancel() {
    }

    @Override // net.sf.okapi.common.filters.IFilter, java.lang.AutoCloseable
    public void close() {
        if (this.input != null) {
            this.input.close();
        }
        try {
            if (this.reader != null) {
                this.reader.close();
                this.reader = null;
            }
        } catch (IOException e) {
            throw new OkapiIOException(e);
        }
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public ISkeletonWriter createSkeletonWriter() {
        return new GenericSkeletonWriter();
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public IFilterWriter createFilterWriter() {
        return new GenericFilterWriter(createSkeletonWriter(), getEncoderManager());
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public List<FilterConfiguration> getConfigurations() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new FilterConfiguration(getName(), MOSESTEXT_MIME_TYPE, getClass().getName(), "Moses Text Default", "Default Moses Text configuration.", null, ".txt;"));
        return arrayList;
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public EncoderManager getEncoderManager() {
        if (this.encoderManager == null) {
            this.encoderManager = new EncoderManager();
            this.encoderManager.setMapping(MOSESTEXT_MIME_TYPE, "net.sf.okapi.filters.mosestext.MosesTextEncoder");
        }
        return this.encoderManager;
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public String getDisplayName() {
        return "Moses Text Filter";
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public String getMimeType() {
        return MOSESTEXT_MIME_TYPE;
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public String getName() {
        return "okf_mosestext";
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public IParameters getParameters() {
        return null;
    }

    @Override // net.sf.okapi.common.filters.IFilter, java.util.Iterator
    public boolean hasNext() {
        return this.event != null;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // net.sf.okapi.common.filters.IFilter, java.util.Iterator
    public Event next() {
        Event event = this.event;
        this.event = null;
        if (event.getEventType() == EventType.END_DOCUMENT) {
            return event;
        }
        try {
            this.skel = new GenericSkeleton();
            StringBuilder sb = new StringBuilder();
            boolean z = false;
            while (true) {
                String readLine = this.reader.readLine();
                if (readLine != null) {
                    Matcher matcher = STARTSEGMENT.matcher(readLine);
                    if (matcher.lookingAt()) {
                        if (z) {
                            throw new OkapiIOException("End of segment expected before a new segment.");
                        }
                        readLine = readLine.substring(matcher.group().length());
                        z = true;
                        this.skel.append(matcher.group());
                    } else if (!z) {
                        sb.append(readLine);
                        this.event = processBuffer(sb);
                        return event;
                    }
                    if (readLine.endsWith(ENDSEGMENT)) {
                        sb.append(readLine.substring(0, readLine.length() - ENDSEGMENT.length()));
                        this.event = processBuffer(sb);
                        break;
                    }
                    sb.append(readLine + Util.LINEBREAK_UNIX);
                } else {
                    if (z) {
                        throw new OkapiIOException("End of segment expected before the end of the document.");
                    }
                    this.event = new Event(EventType.END_DOCUMENT, new Ending(IdGenerator.END_DOCUMENT));
                }
            }
            return event;
        } catch (IOException e) {
            throw new OkapiIOException(e);
        }
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public void open(RawDocument rawDocument) {
        open(rawDocument, true);
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public void open(RawDocument rawDocument, boolean z) {
        this.input = rawDocument;
        BOMNewlineEncodingDetector bOMNewlineEncodingDetector = new BOMNewlineEncodingDetector(rawDocument.getStream(), BOMNewlineEncodingDetector.UTF_8);
        bOMNewlineEncodingDetector.detectAndRemoveBom();
        rawDocument.setEncoding(bOMNewlineEncodingDetector.getEncoding());
        String encoding = rawDocument.getEncoding();
        try {
            this.reader = new BufferedReader(new InputStreamReader(bOMNewlineEncodingDetector.getInputStream(), encoding));
            this.lineBreak = bOMNewlineEncodingDetector.getNewlineType().toString();
            boolean hasUtf8Bom = bOMNewlineEncodingDetector.hasUtf8Bom();
            String str = null;
            if (rawDocument.getInputURI() != null) {
                str = rawDocument.getInputURI().getPath();
            }
            this.tuIdGen = new IdGenerator(null);
            StartDocument startDocument = new StartDocument(IdGenerator.START_DOCUMENT);
            startDocument.setName(str);
            startDocument.setEncoding(encoding, hasUtf8Bom);
            startDocument.setLocale(rawDocument.getSourceLocale());
            startDocument.setLineBreak(this.lineBreak);
            startDocument.setFilterId(getName());
            startDocument.setFilterParameters(getParameters());
            startDocument.setFilterWriter(createFilterWriter());
            startDocument.setType(MOSESTEXT_MIME_TYPE);
            startDocument.setMimeType(MOSESTEXT_MIME_TYPE);
            startDocument.setMultilingual(false);
            this.event = new Event(EventType.START_DOCUMENT, startDocument);
        } catch (UnsupportedEncodingException e) {
            throw new OkapiUnsupportedEncodingException(String.format("The encoding '%s' is not supported.", encoding), e);
        }
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public void setFilterConfigurationMapper(IFilterConfigurationMapper iFilterConfigurationMapper) {
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public void setParameters(IParameters iParameters) {
    }

    private Event processBuffer(StringBuilder sb) {
        TextFragment fromPseudoXLIFF = fromPseudoXLIFF(sb.toString());
        TextUnit textUnit = new TextUnit(this.tuIdGen.createId());
        textUnit.setSourceContent(fromPseudoXLIFF);
        textUnit.setPreserveWhitespaces(true);
        boolean z = !this.skel.isEmpty();
        this.skel.addContentPlaceholder(textUnit);
        if (z) {
            this.skel.append(ENDSEGMENT);
        }
        this.skel.add(this.lineBreak);
        textUnit.setSkeleton(this.skel);
        return new Event(EventType.TEXT_UNIT, textUnit);
    }

    public TextFragment fromPseudoXLIFF(String str) {
        TextFragment textFragment = new TextFragment();
        if (Util.isEmpty(str)) {
            return textFragment;
        }
        if (str.indexOf(60) == -1 && str.indexOf(38) == -1) {
            textFragment.append(str);
            return textFragment;
        }
        StringBuilder sb = new StringBuilder(str.replaceAll("(&#13;)|(&#x0*?[dD];)", Util.LINEBREAK_MAC).replace("&apos;", "'").replace("&lt;", "<").replace("&gt;", ">").replace("&quot;", "\"").replace("&amp;", "&"));
        ArrayList arrayList = new ArrayList();
        Stack stack = new Stack();
        for (Matcher matcher = OPENCLOSE.matcher(sb.toString()); matcher.find(); matcher = OPENCLOSE.matcher(sb.toString())) {
            if (matcher.group(1) != null) {
                int strToInt = Util.strToInt(matcher.group(3), -1);
                Code code = new Code(TextFragment.TagType.OPENING, "g", matcher.group(1));
                code.setId(strToInt);
                arrayList.add(code);
                sb.replace(matcher.start(), matcher.end(), String.format("%c%c", 57601, Character.valueOf(TextFragment.toChar(arrayList.size() - 1))));
                stack.push(Integer.valueOf(strToInt));
            } else {
                arrayList.add(new Code(TextFragment.TagType.CLOSING, "g", matcher.group(4)));
                sb.replace(matcher.start(), matcher.end(), String.format("%c%c", 57602, Character.valueOf(TextFragment.toChar(arrayList.size() - 1))));
            }
        }
        Matcher matcher2 = ISOLATED.matcher(sb.toString());
        while (true) {
            Matcher matcher3 = matcher2;
            if (!matcher3.find()) {
                break;
            }
            int strToInt2 = Util.strToInt(matcher3.group(3), -1);
            String group = matcher3.group(1);
            Code code2 = group.equals("bx") ? new Code(TextFragment.TagType.OPENING, CodeTypeForPairedTagsHelper.DEFAULT_CODE_TYPE + strToInt2, matcher3.group()) : group.equals("ex") ? new Code(TextFragment.TagType.CLOSING, CodeTypeForPairedTagsHelper.DEFAULT_CODE_TYPE + strToInt2, matcher3.group()) : new Code(TextFragment.TagType.PLACEHOLDER, "x", matcher3.group());
            code2.setId(strToInt2);
            arrayList.add(code2);
            sb.replace(matcher3.start(), matcher3.end(), String.format("%c%c", 57603, Character.valueOf(TextFragment.toChar(arrayList.size() - 1))));
            matcher2 = ISOLATED.matcher(sb.toString());
        }
        Matcher matcher4 = LINEBREAK.matcher(sb.toString());
        while (true) {
            Matcher matcher5 = matcher4;
            if (!matcher5.find()) {
                textFragment.setCodedText(sb.toString(), arrayList);
                return textFragment;
            }
            sb.replace(matcher5.start(), matcher5.end(), Util.LINEBREAK_UNIX);
            matcher4 = LINEBREAK.matcher(sb.toString());
        }
    }
}
