package net.sf.gluebooster.demos.pojo.languages.chinese;

import java.net.URL;
import java.util.List;

import net.sf.gluebooster.java.booster.basic.gui.DialogConfiguration;
import net.sf.gluebooster.java.booster.basic.gui.UserInteraction;
import net.sf.gluebooster.java.booster.basic.gui.UserInteractionBoostUtils;
import net.sf.gluebooster.java.booster.basic.gui.swing.UserInteractionWithSwing;
import net.sf.gluebooster.java.booster.essentials.eventsCommands.Callable;
import net.sf.gluebooster.java.booster.essentials.eventsCommands.CallableByConstant;
import net.sf.gluebooster.java.booster.essentials.utils.IoBoostUtils;
import net.sf.gluebooster.java.booster.essentials.utils.TextBoostUtils;

public class WikibookEnglishHanziAnalyzer {

	private static String extractSentences() throws Exception {
		StringBuilder result = new StringBuilder();
		String wikibooks = "https://en.wikibooks.org";
		
		String url = wikibooks + "/wiki/Category:Book:English-Hanzi";

		DialogConfiguration dialogConfiguration = DialogConfiguration.input("Hanzi URL", "Enter the url of the hanzi category",
				"https://en.wikibooks.org/wiki/Category:Book:English-Hanzi");
		CallableByConstant<UserInteraction> userInteractionFactory = new CallableByConstant<UserInteraction>(new UserInteractionWithSwing());
		Callable dialog = UserInteractionBoostUtils.displayDialog("hanzi url", userInteractionFactory, dialogConfiguration, false, Object.class);
		url = dialog.call(dialogConfiguration).toString();
		String index = IoBoostUtils.getContent(new URL(url));

		for (String href : TextBoostUtils.findAll(index, "<a.*?wiki/English-Hanzi/.*?</a>")) {
			// Example: <a href="/wiki/English-Hanzi/Oxygen" title="English-Hanzi/Oxygen">English-Hanzi/Oxygen</a>

			try {
				String link = TextBoostUtils.find(href, "/wiki/[^\"]*", true);
				String text = IoBoostUtils.getContent(new URL(wikibooks + link));
				text = text.replace("\r", "").replace("\n", "");
				text = text.replaceAll("<a[^>].+?>", "").replace("</a>", "");
				// String chinese = TextBoostUtils.find(text, "<p>[^<]*?<b>.*?</p>", true);
				List<String> englishChinese = TextBoostUtils.findAll(text, "<p>[^<]*?<b>.*?</p>");
				String chinese;
				switch (englishChinese.size()) {
				case 1:
					chinese = englishChinese.get(0);
					break;
				case 2:
					chinese = englishChinese.get(1);
					break;
				default:
					chinese = null;
				}

				if (chinese == null) {
					System.err.println("did not find chinese in " + link);
				} else {
					chinese = chinese.replaceAll("<.+?>", "");
					result.append(chinese + "   ([b:en:" + link.replace("/wiki/", "") + "| English-Hanzi von [[b:en:User:Efex3|Efex3]]\r\n");
				}
			} catch (Exception ex) {
				ex.printStackTrace();
			}
		}

		return result.toString();
	}

	public static void main(String[] ignored) throws Exception {
		System.out.println(extractSentences());
	}

}
