/*
 * Decompiled with CFR 0.152.
 */
package io.annot8.components.tesseract.processors;

import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.components.responses.ProcessorResponse;
import io.annot8.api.context.Context;
import io.annot8.api.data.Content;
import io.annot8.api.data.Item;
import io.annot8.api.properties.Properties;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessor;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.Text;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import net.sourceforge.tess4j.util.LoadLibs;
import org.apache.commons.io.FilenameUtils;

@ComponentName(value="Tesseract OCR")
@ComponentDescription(value="Use Tesseract to extract text from images stored in FileContent")
@SettingsClass(value=Settings.class)
public class OCR
extends AbstractProcessorDescriptor<Processor, Settings> {
    protected Processor createComponent(Context context, Settings settings) {
        Tesseract instance = new Tesseract();
        if (!settings.getConfigs().isEmpty()) {
            instance.setConfigs(settings.getConfigs());
        }
        instance.setDatapath(settings.getDataPath());
        instance.setLanguage(settings.getLanguage());
        instance.setOcrEngineMode(settings.getOcrEngine());
        instance.setPageSegMode(settings.getPageSegmentation());
        settings.getVariables().forEach((arg_0, arg_1) -> ((ITesseract)instance).setTessVariable(arg_0, arg_1));
        return new Processor(settings.getExtensions(), (ITesseract)instance);
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(FileContent.class).withCreatesContent(Text.class).build();
    }

    public static class Settings
    implements io.annot8.api.settings.Settings {
        private List<String> extensions = Arrays.asList("bmp", "gif", "jpg", "jpeg", "pdf", "tif", "tiff");
        private List<String> configs = new ArrayList<String>();
        private String dataPath = LoadLibs.extractTessResources((String)"tessdata").toString();
        private String language = "eng";
        private int ocrEngine = 3;
        private int pageSegmentation = -1;
        private Map<String, String> variables = new HashMap<String, String>();

        @Description(value="List of file extensions (case insensitive) that will be OCR'd")
        public List<String> getExtensions() {
            return this.extensions;
        }

        public void setExtensions(List<String> extensions) {
            this.extensions = extensions;
        }

        @Description(value="List of Tesseract configs")
        public List<String> getConfigs() {
            return this.configs;
        }

        public void setConfigs(List<String> configs) {
            this.configs = configs;
        }

        @Description(value="Path to Tesseract models")
        public String getDataPath() {
            return this.dataPath;
        }

        public void setDataPath(String dataPath) {
            this.dataPath = dataPath;
        }

        @Description(value="Expected language of text")
        public String getLanguage() {
            return this.language;
        }

        public void setLanguage(String language) {
            this.language = language;
        }

        @Description(value="Tesseract engine to use")
        public int getOcrEngine() {
            return this.ocrEngine;
        }

        public void setOcrEngine(int ocrEngine) {
            this.ocrEngine = ocrEngine;
        }

        @Description(value="Tesseract page segmentation setting")
        public int getPageSegmentation() {
            return this.pageSegmentation;
        }

        public void setPageSegmentation(int pageSegmentation) {
            this.pageSegmentation = pageSegmentation;
        }

        @Description(value="Additional Tesseract variables")
        public Map<String, String> getVariables() {
            return this.variables;
        }

        public void setVariables(Map<String, String> variables) {
            this.variables = variables;
        }

        public boolean validate() {
            return this.extensions != null && !this.extensions.isEmpty() && this.configs != null && this.dataPath != null && !this.dataPath.isEmpty() && this.language != null && !this.language.isEmpty() && this.variables != null;
        }
    }

    public static class Processor
    extends AbstractProcessor {
        private final ITesseract instance;
        private final List<String> extensions;

        public Processor(List<String> extensions, ITesseract tesseract) {
            this.extensions = extensions;
            this.instance = tesseract;
        }

        public ProcessorResponse process(Item item) {
            item.getContents(FileContent.class).filter(fc -> this.extensions.contains(FilenameUtils.getExtension((String)((File)fc.getData()).getName()).toLowerCase())).forEach(fc -> {
                try {
                    String content = this.instance.doOCR((File)fc.getData());
                    ((Content.Builder)item.createContent(Text.class).withDescription("OCR from " + fc.getId()).withData((Object)content).withProperties((Properties)fc.getProperties())).save();
                }
                catch (TesseractException e) {
                    this.log().error("Unable to extract text from content {}", (Object)fc.getId(), (Object)e);
                }
            });
            return ProcessorResponse.ok();
        }
    }
}

