package net.vidageek.crawler.component;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import net.vidageek.crawler.Page;
import net.vidageek.crawler.Status;
import net.vidageek.crawler.config.http.Cookie;
import net.vidageek.crawler.exception.CrawlerException;
import net.vidageek.crawler.page.ErrorPage;
import net.vidageek.crawler.page.OkPage;
import net.vidageek.crawler.page.RejectedMimeTypePage;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.log4j.Logger;

/* loaded from: input_file:net/vidageek/crawler/component/WebDownloader.class */
public class WebDownloader implements Downloader {
    private final ConcurrentLinkedQueue<String> mimeTypesToInclude;
    private final Logger log;
    private final ConcurrentLinkedQueue<Cookie> cookies;

    public WebDownloader(List<String> list) {
        this(list, new ArrayList());
    }

    public WebDownloader(List<String> list, List<Cookie> list2) {
        this.log = Logger.getLogger(WebDownloader.class);
        this.cookies = new ConcurrentLinkedQueue<>(list2);
        this.mimeTypesToInclude = new ConcurrentLinkedQueue<>(list);
    }

    public WebDownloader() {
        this(Arrays.asList("text/html"));
    }

    @Override // net.vidageek.crawler.component.Downloader
    public Page get(String str) {
        DefaultHttpClient defaultHttpClient = new DefaultHttpClient();
        Iterator<Cookie> it = this.cookies.iterator();
        while (it.hasNext()) {
            Cookie next = it.next();
            String name = next.name();
            String value = next.value();
            this.log.debug("Creating cookie [" + name + " = " + value + "] " + next.domain());
            BasicClientCookie basicClientCookie = new BasicClientCookie(name, value);
            basicClientCookie.setPath(next.path());
            basicClientCookie.setDomain(next.domain());
            defaultHttpClient.getCookieStore().addCookie(basicClientCookie);
        }
        defaultHttpClient.getParams().setIntParameter("http.socket.timeout", 15000);
        return get(defaultHttpClient, str);
    }

    public Page get(HttpClient httpClient, String str) {
        try {
            String encode = encode(str);
            this.log.debug("Requesting url: [" + encode + "]");
            HttpGet httpGet = new HttpGet(encode);
            try {
                HttpResponse execute = httpClient.execute(httpGet);
                Status fromHttpCode = Status.fromHttpCode(execute.getStatusLine().getStatusCode());
                if (!acceptsMimeType(execute.getLastHeader("Content-Type"))) {
                    RejectedMimeTypePage rejectedMimeTypePage = new RejectedMimeTypePage(str, fromHttpCode, execute.getLastHeader("Content-Type").getValue());
                    httpGet.abort();
                    return rejectedMimeTypePage;
                }
                if (!Status.OK.equals(fromHttpCode)) {
                    ErrorPage errorPage = new ErrorPage(str, fromHttpCode);
                    httpGet.abort();
                    return errorPage;
                }
                CharsetDetector charsetDetector = new CharsetDetector();
                charsetDetector.setText(read(execute.getEntity().getContent()));
                CharsetMatch detect = charsetDetector.detect();
                this.log.debug("Detected charset: " + detect.getName());
                OkPage okPage = new OkPage(str, new String(Charset.forName("UTF-8").encode(CharBuffer.wrap(detect.getString().toCharArray())).array(), "UTF-8"));
                httpGet.abort();
                return okPage;
            } catch (Throwable th) {
                httpGet.abort();
                throw th;
            }
        } catch (IOException e) {
            throw new CrawlerException("Could not retrieve data from " + str, e);
        }
    }

    private boolean acceptsMimeType(Header header) {
        String value = header.getValue();
        if (value == null) {
            return false;
        }
        Iterator<String> it = this.mimeTypesToInclude.iterator();
        while (it.hasNext()) {
            if (value.contains(it.next())) {
                return true;
            }
        }
        return false;
    }

    private byte[] read(InputStream inputStream) {
        byte[] bArr = new byte[1000];
        int i = 0;
        while (true) {
            try {
                int read = inputStream.read();
                if (read == -1) {
                    break;
                }
                int i2 = i;
                i++;
                bArr[i2] = (byte) read;
                if (bArr.length == i) {
                    byte[] bArr2 = new byte[((bArr.length * 3) / 2) + 1];
                    for (int i3 = 0; i3 < bArr.length; i3++) {
                        bArr2[i3] = bArr[i3];
                    }
                    bArr = bArr2;
                }
            } catch (IOException e) {
                new CrawlerException("There was a problem reading stream.", e);
            }
        }
        return Arrays.copyOf(bArr, i);
    }

    private String encode(String str) {
        String str2 = "";
        for (char c : str.toCharArray()) {
            if (":/.?&#=".contains("" + c)) {
                str2 = str2 + c;
            } else {
                try {
                    str2 = str2 + URLEncoder.encode("" + c, "UTF-8");
                } catch (UnsupportedEncodingException e) {
                    throw new CrawlerException("There is something really wrong with your JVM. It could not find UTF-8 encoding.", e);
                }
            }
        }
        return str2;
    }
}
