package cc.twittertools.search;

import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:cc/twittertools/search/TrecTopicSet.class */
public class TrecTopicSet implements Iterable<TrecTopic> {
    private List<TrecTopic> queries = Lists.newArrayList();
    private static final Pattern TOP_PATTERN = Pattern.compile("<top(.*?)</top>", 32);
    private static final Pattern NUM_PATTERN = Pattern.compile("<num> Number: (MB\\d+) </num>", 32);
    private static final Pattern TITLE_PATTERN = Pattern.compile("<title>\\s*(.*?)\\s*</title>", 32);
    private static final Pattern TITLE_PATTERN2 = Pattern.compile("<query>\\s*(.*?)\\s*</query>", 32);
    private static final Pattern TWEETTIME_PATTERN = Pattern.compile("<querytweettime>\\s*(\\d+)\\s*</querytweettime>", 32);

    private TrecTopicSet() {
    }

    private void add(TrecTopic trecTopic) {
        this.queries.add(trecTopic);
    }

    @Override // java.lang.Iterable
    public Iterator<TrecTopic> iterator() {
        return this.queries.iterator();
    }

    public static TrecTopicSet fromFile(File file) throws IOException {
        Preconditions.checkNotNull(file);
        Preconditions.checkArgument(file.exists());
        String join = Joiner.on("\n").join(Files.readLines(file, Charsets.UTF_8));
        TrecTopicSet trecTopicSet = new TrecTopicSet();
        Matcher matcher = TOP_PATTERN.matcher(join);
        while (matcher.find()) {
            String group = matcher.group(0);
            Matcher matcher2 = NUM_PATTERN.matcher(group);
            if (!matcher2.find()) {
                throw new IOException("Error parsing " + file);
            }
            String group2 = matcher2.group(1);
            if (group2.matches("MB0\\d\\d")) {
                group2 = group2.replace("MB0", "MB");
            }
            Matcher matcher3 = TITLE_PATTERN.matcher(group);
            if (!matcher3.find()) {
                matcher3 = TITLE_PATTERN2.matcher(group);
                if (!matcher3.find()) {
                    throw new IOException("Error parsing " + file);
                }
            }
            String group3 = matcher3.group(1);
            Matcher matcher4 = TWEETTIME_PATTERN.matcher(group);
            if (!matcher4.find()) {
                throw new IOException("Error parsing " + file);
            }
            trecTopicSet.add(new TrecTopic(group2, group3, Long.parseLong(matcher4.group(1))));
        }
        return trecTopicSet;
    }
}
