package org.codehaus.bayesian;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:org/codehaus/bayesian/Extractor.class */
public class Extractor {
    private File file;
    private File wordListFile;
    private FileReader fileInput;
    private BufferedReader fileReader;
    private WordList wordList;

    public Extractor(WordList wordList, File file) throws Exception {
        this.wordList = wordList;
        this.fileInput = new FileReader(file);
        this.fileReader = new BufferedReader(this.fileInput);
    }

    public void extractOutlook() throws Exception {
        StringBuffer stringBuffer = new StringBuffer();
        double d = 0.0d;
        this.fileReader.readLine();
        String readLine = this.fileReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                break;
            }
            if (str.startsWith("\"")) {
                d += 1.0d;
            } else if (!str.equals("\r")) {
                stringBuffer.append(new StringBuffer().append(str).append(" ").toString());
            }
            readLine = this.fileReader.readLine();
        }
        String stringBuffer2 = stringBuffer.toString();
        StringBuffer stringBuffer3 = new StringBuffer();
        int i = 0;
        try {
            Matcher matcher = Pattern.compile("(http:[^\\s]*\\s)|(www.[^\\s]+\\s)", 2).matcher(stringBuffer2);
            while (matcher.find()) {
                int start = matcher.start();
                int end = matcher.end();
                stringBuffer3.append(new StringBuffer().append(stringBuffer2.substring(i, start).replaceAll("(\".*\")|(BEGIN[.*]*END)|(>)|(<)|(_)|(-)|(\\*)|(#)|(<.*>)([,\\.\\=\t\\*\\+\\\\])|(\\&nbsp\\;)|(\\<.*)|(href)", "")).append(" ").toString());
                i = end;
                stringBuffer3.append(new StringBuffer().append(getUrl(stringBuffer2.substring(start, end).replaceAll("[\\>\"\\<,]", ""))).append(" ").toString());
            }
        } catch (Exception e) {
        }
        this.wordList.update(stringBuffer3.toString(), d / 2.0d);
    }

    public void extractSpamArchive() throws Exception {
        StringBuffer stringBuffer = new StringBuffer();
        double d = 0.0d;
        String readLine = this.fileReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                break;
            }
            if (str.startsWith("From:")) {
                boolean z = false;
                while (!z) {
                    String readLine2 = this.fileReader.readLine();
                    if (readLine2.startsWith("to:")) {
                        d += 1.0d;
                        z = true;
                    } else if (readLine2.startsWith("Subject:")) {
                        StringTokenizer stringTokenizer = new StringTokenizer(readLine2);
                        stringTokenizer.nextToken();
                        while (stringTokenizer.hasMoreTokens()) {
                            stringBuffer.append(new StringBuffer().append(new StringBuffer().append("Subject:").append(stringTokenizer.nextToken()).toString()).append(" ").toString());
                        }
                    }
                }
            } else {
                stringBuffer.append(new StringBuffer().append(str).append(" ").toString());
            }
            readLine = this.fileReader.readLine();
        }
        String stringBuffer2 = stringBuffer.toString();
        StringBuffer stringBuffer3 = new StringBuffer();
        int i = 0;
        try {
            Matcher matcher = Pattern.compile("(http:[^\\s]*\\s)|(www.[^\\s]+\\s)", 2).matcher(stringBuffer2);
            while (matcher.find()) {
                int start = matcher.start();
                int end = matcher.end();
                stringBuffer3.append(new StringBuffer().append(stringBuffer2.substring(i, start).replaceAll("([,\\-\\.\\=\t\\*\\+\\\\])|(<[^>]*>)|(\\&nbsp\\;)|(\\<.*)|(href)", "")).append(" ").toString());
                i = end;
                stringBuffer3.append(new StringBuffer().append(getUrl(stringBuffer2.substring(start, end).replaceAll("[\\>\"\\<,]", ""))).append(" ").toString());
            }
        } catch (Exception e) {
        }
        this.wordList.update(stringBuffer3.toString(), d);
    }

    public void extractText() throws Exception {
        String readLine = this.fileReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return;
            }
            this.wordList.update(str, 1.0d);
            readLine = this.fileReader.readLine();
        }
    }

    public void extractMailspool() throws Exception {
        String readLine = this.fileReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return;
            }
            if (str.startsWith("From ")) {
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                while (!str.equals("")) {
                    arrayList.add(str);
                    str = this.fileReader.readLine();
                }
                while (true) {
                    if (str.startsWith("From ")) {
                        break;
                    }
                    arrayList2.add(str);
                    str = this.fileReader.readLine();
                    if (str == null) {
                        parseUpdate(arrayList, arrayList2);
                        break;
                    }
                }
                parseUpdate(arrayList, arrayList2);
            }
            readLine = this.fileReader.readLine();
        }
    }

    public void extractEudora() throws Exception {
        String readLine = this.fileReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return;
            }
            if (str.startsWith("From ???@???")) {
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                while (!str.equals("")) {
                    arrayList.add(str);
                    str = this.fileReader.readLine();
                }
                while (true) {
                    if (str.startsWith("From ")) {
                        break;
                    }
                    arrayList2.add(str);
                    str = this.fileReader.readLine();
                    if (str == null) {
                        parseUpdate(arrayList, arrayList2);
                        break;
                    }
                }
                parseUpdate(arrayList, arrayList2);
            }
            readLine = this.fileReader.readLine();
        }
    }

    private void parseUpdate(ArrayList arrayList, ArrayList arrayList2) {
        Parser parser = new Parser();
        Message message = new Message();
        message.setHeader(arrayList);
        message.setBody(arrayList2);
        message.setDate(getMessageDate(arrayList));
        message.setFrom(getMessageFrom(arrayList));
        message.setSubject(getMessageSubject(arrayList));
        message.setContentType(getMessageContentType(arrayList));
        message.setContentTransferEncoding(getMessageContentTransferEncoding(arrayList));
        message.setBoundary(getMessageBoundary(arrayList));
        this.wordList.update(parser.parse(message), 1.0d);
    }

    private String getUrl(String str) {
        int indexOf = str.indexOf("//");
        int indexOf2 = str.indexOf("/", indexOf + 2);
        if (indexOf == -1) {
            return new StringBuffer().append("URL:").append(str).toString();
        }
        if (indexOf2 != -1) {
            return new StringBuffer().append("URL:").append(str.substring(indexOf + 2, indexOf2)).toString();
        }
        return new StringBuffer().append("URL:").append(str.substring(indexOf + 2)).toString();
    }

    private String getMessageFrom(ArrayList arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            String str = (String) arrayList.get(i);
            if (str.startsWith("From: ")) {
                return str;
            }
        }
        return null;
    }

    private String getMessageContentType(ArrayList arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            String str = (String) arrayList.get(i);
            if (str.startsWith("Content-Type: ")) {
                int indexOf = str.indexOf(":");
                int indexOf2 = str.indexOf(";");
                return indexOf2 == -1 ? str.substring(indexOf + 2) : str.substring(indexOf + 2, indexOf2);
            }
        }
        return "text/plain";
    }

    private String getMessageContentTransferEncoding(ArrayList arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            String str = (String) arrayList.get(i);
            if (str.startsWith("Content-Transfer-Encoding:")) {
                return str.substring(str.indexOf(":") + 2);
            }
        }
        return null;
    }

    private String getMessageSubject(ArrayList arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            String str = (String) arrayList.get(i);
            if (str.startsWith("Subject: ")) {
                return str.substring(9);
            }
        }
        return null;
    }

    private String getMessageDate(ArrayList arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            String str = (String) arrayList.get(i);
            if (str.startsWith("Date: ")) {
                return str;
            }
        }
        return null;
    }

    private String getMessageBoundary(ArrayList arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            String str = (String) arrayList.get(i);
            int indexOf = str.indexOf("boundary=\"");
            if (indexOf != -1) {
                return new StringBuffer().append("--").append(str.substring(indexOf + 10, str.indexOf("\"", indexOf + 12))).toString();
            }
        }
        return null;
    }
}
