package it.unimi.di.mg4j.document.tika;

import it.unimi.di.mg4j.document.AbstractDocument;
import it.unimi.di.mg4j.document.Document;
import it.unimi.di.mg4j.document.DocumentFactory;
import it.unimi.di.mg4j.document.PropertyBasedDocumentFactory;
import it.unimi.di.mg4j.util.MG4JClassParser;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.ObjectParser;
import it.unimi.dsi.util.Properties;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParsingReader;

/* loaded from: input_file:it/unimi/di/mg4j/document/tika/AbstractSimpleTikaDocumentFactory.class */
public abstract class AbstractSimpleTikaDocumentFactory extends AbstractTikaDocumentFactory {
    private static final long serialVersionUID = 1;
    private List<TikaField> fields;
    private WordReader wordReader;

    public AbstractSimpleTikaDocumentFactory() {
        init();
    }

    public AbstractSimpleTikaDocumentFactory(Reference2ObjectMap<Enum<?>, Object> reference2ObjectMap) {
        super(reference2ObjectMap);
        init();
    }

    public AbstractSimpleTikaDocumentFactory(Properties properties) throws ConfigurationException {
        super(properties);
        init();
    }

    public AbstractSimpleTikaDocumentFactory(String[] strArr) throws ConfigurationException {
        super(strArr);
        init();
    }

    private void init() {
        try {
            Object obj = this.defaultMetadata.get(PropertyBasedDocumentFactory.MetadataKeys.WORDREADER);
            this.wordReader = obj == null ? new FastBufferedReader() : (WordReader) ObjectParser.fromSpec(obj.toString(), WordReader.class, MG4JClassParser.PACKAGE);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // it.unimi.di.mg4j.document.PropertyBasedDocumentFactory
    public boolean parseProperty(String str, String[] strArr, Reference2ObjectMap<Enum<?>, Object> reference2ObjectMap) throws ConfigurationException {
        if (!sameKey(PropertyBasedDocumentFactory.MetadataKeys.WORDREADER, str)) {
            return super.parseProperty(str, strArr, reference2ObjectMap);
        }
        try {
            String str2 = ensureJustOne(str, strArr).toString();
            reference2ObjectMap.put(PropertyBasedDocumentFactory.MetadataKeys.WORDREADER, str2);
            ObjectParser.fromSpec(str2, WordReader.class, MG4JClassParser.PACKAGE);
            return true;
        } catch (ClassNotFoundException e) {
            throw new ConfigurationException(e);
        } catch (Exception e2) {
            throw new ConfigurationException(e2);
        }
    }

    @Override // it.unimi.di.mg4j.document.tika.AbstractTikaDocumentFactory
    protected List<TikaField> fields() {
        if (this.fields == null) {
            this.fields = new ArrayList();
            this.fields.add(new TikaField());
            this.fields.addAll(metadataFields());
        }
        return this.fields;
    }

    @Override // it.unimi.di.mg4j.document.DocumentFactory
    public Document getDocument(final InputStream inputStream, final Reference2ObjectMap<Enum<?>, Object> reference2ObjectMap) throws IOException {
        return new AbstractDocument() { // from class: it.unimi.di.mg4j.document.tika.AbstractSimpleTikaDocumentFactory.1
            private ParsingReader parsingReader;
            private Metadata tikaMetadata;

            @Override // it.unimi.di.mg4j.document.Document
            public CharSequence title() {
                return (CharSequence) AbstractSimpleTikaDocumentFactory.this.resolve(PropertyBasedDocumentFactory.MetadataKeys.TITLE, reference2ObjectMap);
            }

            @Override // it.unimi.di.mg4j.document.Document
            public CharSequence uri() {
                return (CharSequence) AbstractSimpleTikaDocumentFactory.this.resolve(PropertyBasedDocumentFactory.MetadataKeys.URI, reference2ObjectMap);
            }

            @Override // it.unimi.di.mg4j.document.Document
            public Object content(int i) throws IOException {
                AbstractSimpleTikaDocumentFactory.this.ensureFieldIndex(i);
                if (this.parsingReader == null && this.tikaMetadata == null) {
                    this.tikaMetadata = new Metadata();
                    if (uri() != null) {
                        this.tikaMetadata.set("resourceName", uri().toString());
                    }
                    this.parsingReader = new ParsingReader(AbstractSimpleTikaDocumentFactory.this.getParser(), inputStream, this.tikaMetadata, new ParseContext());
                }
                if (AbstractSimpleTikaDocumentFactory.this.fields().get(i).isBody()) {
                    return this.parsingReader;
                }
                String contentFromMetadata = AbstractSimpleTikaDocumentFactory.this.fields().get(i).contentFromMetadata(this.tikaMetadata);
                return contentFromMetadata == null ? new FastBufferedReader(new StringReader("")) : new FastBufferedReader(new StringReader(contentFromMetadata));
            }

            @Override // it.unimi.di.mg4j.document.AbstractDocument, it.unimi.di.mg4j.document.Document, java.io.Closeable, java.lang.AutoCloseable
            public void close() throws IOException {
                super.close();
                if (this.parsingReader != null) {
                    this.parsingReader.close();
                    this.parsingReader = null;
                }
            }

            @Override // it.unimi.di.mg4j.document.Document
            public WordReader wordReader(int i) {
                AbstractSimpleTikaDocumentFactory.this.ensureFieldIndex(i);
                return AbstractSimpleTikaDocumentFactory.this.wordReader;
            }
        };
    }

    protected List<? extends TikaField> metadataFields() {
        return Collections.emptyList();
    }

    protected abstract Parser getParser();

    @Override // it.unimi.di.mg4j.document.DocumentFactory
    /* renamed from: copy, reason: merged with bridge method [inline-methods] */
    public DocumentFactory m37copy() {
        return this;
    }
}
