package it.tidalwave.bluebill.factsheet.wikipedia.photos;

import it.tidalwave.bluebill.taxonomy.Taxon;
import it.tidalwave.bluebill.taxonomy.birds.col.CatalogueOfLifeImporter;
import it.tidalwave.openrdf.elmo.impl.ElmoEntityFactory;
import it.tidalwave.semantic.EntityFactory;
import it.tidalwave.semantic.Wrapper;
import it.tidalwave.util.Initializer;
import it.tidalwave.xml.XPathProvider;
import it.tidalwave.xml.XmlParser;
import it.tidalwave.xml.dom4j.Dom4jXmlParser;
import it.tidalwave.xml.jaxen.JaxenXPathProvider;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.Proxy;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.xml.namespace.QName;
import org.apache.commons.io.IOUtils;
import org.jaxen.XPath;
import org.openrdf.concepts.dc.DcResource;
import org.openrdf.concepts.foaf.FoafResource;
import org.openrdf.concepts.foaf.Image;
import org.openrdf.concepts.skos.core.Concept;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:it/tidalwave/bluebill/factsheet/wikipedia/photos/WikipediaPhotosImporter.class */
public class WikipediaPhotosImporter extends CatalogueOfLifeImporter {
    public static final String DCMI_TYPE_STILL_IMAGE = "http://purl.org/dc/dcmitype/StillImage";
    public static final String LICENSE_GNU_FDL = "http://www.gnu.org/copyleft/fdl.html";

    @Nonnull
    private final transient XmlParser xmlParser;

    @Nonnull
    private final transient XPathProvider xPathProvider;
    private transient Proxy proxy;
    private static final String XPATH_DIV = "/html:html/html:body/html:div/html:div/html:table/html:tr/html:td/html:div/html:div/html:div";
    private static final String XPATH_A_HREF = "html:a/@href";
    private static final String XPATH_A_IMG_SRC = "html:a/html:img/@src";
    private static final String XPATH_2 = "//html:div[@id='file']/html:a[1]/@href";
    private static final String XPATH_3 = "/html:html/html:head/html:link[@rel='copyright']/@href";
    protected final transient EntityFactory<Description, ElmoDescription> stillImageFactory;
    protected final transient EntityFactory<Organization, ElmoOrganization> organizationFactory;
    protected final transient EntityFactory<Person, ElmoPerson> authorFactory;
    protected transient Organization wikiPedia;
    private final transient Map<String, Person> map;
    protected final transient EntityResolver entityResolver;

    public WikipediaPhotosImporter() throws IOException {
        this(new Dom4jXmlParser(), new JaxenXPathProvider());
    }

    WikipediaPhotosImporter(@Nonnull XmlParser xmlParser, @Nonnull XPathProvider xPathProvider) throws IOException {
        this.proxy = Proxy.NO_PROXY;
        this.stillImageFactory = new ElmoEntityFactory(Description.class, ElmoDescription.class, new Class[]{Image.class, DcResource.class});
        this.organizationFactory = new ElmoEntityFactory(Organization.class, ElmoOrganization.class, new Class[]{Concept.class, DcResource.class, org.openrdf.concepts.foaf.Organization.class, FoafResource.class});
        this.authorFactory = new ElmoEntityFactory(Person.class, ElmoPerson.class, new Class[]{Concept.class, DcResource.class, org.openrdf.concepts.foaf.Person.class});
        this.map = new HashMap();
        this.entityResolver = new EntityResolver() { // from class: it.tidalwave.bluebill.factsheet.wikipedia.photos.WikipediaPhotosImporter.1
            @Override // org.xml.sax.EntityResolver
            @Nonnull
            public InputSource resolveEntity(@Nonnull String str, @Nonnull String str2) throws SAXException, IOException {
                if ("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd".equals(str2)) {
                    return new InputSource(getClass().getResourceAsStream("xhtml1-transitional.dtd"));
                }
                return null;
            }
        };
        this.xmlParser = xmlParser;
        this.xPathProvider = xPathProvider;
        this.xmlParser.setEntityResolver(this.entityResolver);
    }

    protected void initialize() throws IOException {
        super.initialize();
        this.wikiPedia = (Organization) this.organizationFactory.create(new QName("http://wikipedia.org/#organization"));
        ((Concept) this.wikiPedia.getLookup().lookup(Concept.class)).setRdfsLabel("Wikipedia");
    }

    @Nonnull
    protected Taxon.Builder process(@Nonnull String str, @Nonnull String str2, @Nonnull Taxon.Builder builder) throws Exception {
        if (builder.getRank() != Taxon.Rank.SPECIES) {
            throw new CatalogueOfLifeImporter.IgnoreException();
        }
        InputStream openStream = new URL("http://commons.wikimedia.org/wiki/" + str2.replace(' ', '_')).openStream();
        final List<URI> processPage = processPage(openStream);
        openStream.close();
        return builder.withInitializer(new Initializer<Taxon>() { // from class: it.tidalwave.bluebill.factsheet.wikipedia.photos.WikipediaPhotosImporter.2
            @Nonnull
            public Taxon initialize(@Nonnull Taxon taxon) {
                for (URI uri : processPage) {
                    try {
                        System.err.printf("Processing: %s\n", uri);
                        InputStream openStream2 = uri.toURL().openStream();
                        Description processPage2 = WikipediaPhotosImporter.this.processPage2(openStream2);
                        openStream2.close();
                        ((Image) processPage2.getLookup().lookup(Image.class)).getFoafDepicts().add(Wrapper.unwrap(taxon));
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                return taxon;
            }
        });
    }

    protected List<URI> processPage(@Nonnull InputStream inputStream) throws Exception {
        Object parse = this.xmlParser.parse(inputStream);
        XPath createXPath = this.xPathProvider.createXPath(parse, XPATH_DIV);
        XPath createXPath2 = this.xPathProvider.createXPath(parse, XPATH_A_HREF);
        XPath createXPath3 = this.xPathProvider.createXPath(parse, XPATH_A_IMG_SRC);
        List selectNodes = createXPath.selectNodes(parse);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < selectNodes.size(); i++) {
            Object obj = selectNodes.get(i);
            String str = "http://commons.wikimedia.org" + createXPath2.stringValueOf(obj);
            System.err.println(str + "  " + createXPath3.stringValueOf(obj));
            arrayList.add(new URI(str));
        }
        return arrayList;
    }

    protected Description processPage2(@Nonnull InputStream inputStream) throws Exception {
        byte[] byteArray = IOUtils.toByteArray(inputStream);
        String str = new String(byteArray, "UTF-8");
        Object parse = this.xmlParser.parse(new ByteArrayInputStream(byteArray));
        XPath createXPath = this.xPathProvider.createXPath(parse, XPATH_2);
        XPath createXPath2 = this.xPathProvider.createXPath(parse, XPATH_3);
        String stringValueOf = createXPath.stringValueOf(parse);
        Description description = (Description) this.stillImageFactory.create(new QName(stringValueOf));
        DcResource dcResource = (DcResource) description.getLookup().lookup(DcResource.class);
        dcResource.getDcTypes().add(DCMI_TYPE_STILL_IMAGE);
        dcResource.getDcFormats().add("image/jpeg");
        dcResource.getDcPublishers().add(Wrapper.unwrap(this.wikiPedia));
        dcResource.getDcIdentifiers().add(stringValueOf);
        dcResource.getDcRights().add(createXPath2.stringValueOf(parse));
        int indexOf = str.indexOf("Creator:");
        if (indexOf >= 0) {
            int length = indexOf + "Creator:".length();
            String trim = str.substring(length, length + str.substring(length).indexOf("</")).trim();
            System.err.println("AUTHOR: " + trim);
            dcResource.getDcCreators().add(Wrapper.unwrap(findOrCreateAuthor("http://wikipedia.org/contributor/" + trim, trim)));
        } else {
            int indexOf2 = str.indexOf("Author:");
            if (indexOf2 >= 0) {
                int length2 = indexOf2 + "Author:".length();
                String trim2 = str.substring(length2, length2 + str.substring(length2).indexOf("</p>")).trim();
                if (trim2.contains("href")) {
                    String replace = trim2.replace("</b> ", "");
                    int indexOf3 = replace.indexOf(34);
                    String str2 = "http://commons.wikimedia.org" + replace.substring(indexOf3 + 1, replace.substring(indexOf3 + 1).indexOf(34) + indexOf3 + 1);
                    int indexOf4 = replace.indexOf(62);
                    String substring = replace.substring(indexOf4 + 1, replace.substring(indexOf4 + 1).indexOf(60) + indexOf4 + 1);
                    dcResource.getDcCreators().add(Wrapper.unwrap(findOrCreateAuthor(str2, substring)));
                    System.err.println("AUTHOR: " + substring);
                } else {
                    System.err.println("AUTHOR: " + trim2);
                    dcResource.getDcCreators().add(Wrapper.unwrap(findOrCreateAuthor("http://wikipedia.org/contributor/" + trim2, trim2)));
                }
            } else {
                int indexOf5 = str.indexOf("Author</th>");
                if (indexOf5 >= 0) {
                    String substring2 = str.substring(indexOf5 + 12);
                    int indexOf6 = substring2.indexOf("href=\"") + 5;
                    String str3 = "http://commons.wikimedia.org" + substring2.substring(indexOf6 + 1, substring2.substring(indexOf6 + 1).indexOf(34) + indexOf6 + 1);
                    int indexOf7 = substring2.indexOf("\">") + 1;
                    String substring3 = substring2.substring(indexOf7 + 1, substring2.substring(indexOf7 + 1).indexOf(60) + indexOf7 + 1);
                    dcResource.getDcCreators().add(Wrapper.unwrap(findOrCreateAuthor(str3, substring3)));
                    System.err.println("AUTHOR: " + substring3);
                }
            }
        }
        return description;
    }

    @Nonnull
    private Person findOrCreateAuthor(@Nonnull String str, @Nonnull String str2) {
        Person person = this.map.get(str);
        if (person == null) {
            person = (Person) this.authorFactory.create(new QName(str));
            this.map.put(str, person);
            ((org.openrdf.concepts.foaf.Person) person.getLookup().lookup(org.openrdf.concepts.foaf.Person.class)).getFoafGivennames().add(str2);
        }
        return person;
    }
}
