package net.sf.gluebooster.demos.pojo.languages.chinese;

import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import net.sf.gluebooster.java.booster.basic.gui.DialogConfiguration;
import net.sf.gluebooster.java.booster.basic.gui.UserInteraction;
import net.sf.gluebooster.java.booster.basic.gui.UserInteractionBoostUtils;
import net.sf.gluebooster.java.booster.basic.gui.swing.UserInteractionWithSwing;
import net.sf.gluebooster.java.booster.essentials.eventsCommands.Callable;
import net.sf.gluebooster.java.booster.essentials.eventsCommands.CallableByConstant;
import net.sf.gluebooster.java.booster.essentials.utils.IoBoostUtils;
import net.sf.gluebooster.java.booster.essentials.utils.TextBoostUtils;

/**
 * Analyzes the chinese character pages of Reinaert Thomas Evert Godfried Albrecht
 * 
 * @see http://www.rtega.be/chmn/index.php
 * @author cbauer
 *
 */
public class RtegaAnalyzer {

	private static String extract() throws Exception {

		String baseUrl = "http://www.rtega.be/chmn/index.php?";
		String SUBPAGE_PRIMITIVES = "64";
		String SUBPAGE_CHARACTERS = "48";
		
		// String subpage = SUBPAGE_PRIMITIVES; // pagecount 10
		String subpage = SUBPAGE_CHARACTERS; // pagecount 189
		int startpage = 0;
		int pagecount = 189;
		// DialogConfiguration dialogConfiguration = DialogConfiguration.input("Hanzi URL", "Enter the url of the hanzi category",
		// "https://en.wikibooks.org/wiki/Category:Book:English-Hanzi");
		// CallableByConstant<UserInteraction> userInteractionFactory = new CallableByConstant<UserInteraction>(new UserInteractionWithSwing());
		// Callable dialog = UserInteractionBoostUtils.displayDialog("hanzi url", userInteractionFactory, dialogConfiguration, false, Object.class);
		// url = dialog.call(dialogConfiguration).toString();

		int interval = 20; // intervall of the links to the pages
		int endpage = startpage + interval * pagecount;
		StringBuilder alternatives = new StringBuilder();
		StringBuilder mnemonics = new StringBuilder();

		for (int start = startpage; start < endpage; start = start + interval) {

			String url = baseUrl + "start=" + start + "&subpage=" + subpage;
			String text = IoBoostUtils.getContent(new URL(url));

			// get the table
			int index = text.indexOf("<table class='chmn");// 'chmn");
			text = text.substring(index);

			index = text.indexOf("<tr");
			text = text.substring(index);
			index = text.indexOf("</table");
			text = text.substring(0, index);

			String endtext = text.substring(text.length() - 100);

			String[] lines = text.split("</tr>");
			// <tr id="4198"><td style="width: 140px; " class="hanzi"><font><font uid="8b02" id="chanzilarge">謂</font><font uid="8b02"
			// id="jhanzilarge">謂</font></font><a href="?reversec=謂">R</a><a target="" href="index.php?submit=4198">S</a><hr><font uid="8c13"
			// id="chanzilarge">谓</font><font uid="8c13" id="jhanzilarge">谓</font></td><td><font style="color:#009999;" id="chanzilarge" uid="27b34"
			// href="?c=𧬴">𧬴</font></td><td id="chmn">say, name, be called, meaning, sense</td><td id="chmn"><i>say</i> <a style="color:#0000ff;"
			// id="jhanzitext" uid="8a00" href="?c=言">言</a> what the stomach <a style="color:#0000ff;" id="jhanzitext" uid="80c3" href="?c=胃">胃</a>
			// <i>senses</i></td><td></td></tr>

			Map<Character, String> entries = new HashMap<Character, String>();

			for (String line : lines) {
				entries.clear();
				List<String> columns = splitLine(line);
				String characters = asText(columns.get(0));
				String alternativeCharacters = asText(columns.get(1));
				// translation columns[2];
				String mnemonic = asText(columns.get(3));
				// System.out.println(characters + " ||| " + alternativeCharacters + " ||| " + mnemonic);
				for (char c : (characters + alternativeCharacters).toCharArray()) {
					entries.put(c, mnemonic);
				}

				for (Map.Entry<Character, String> entry : entries.entrySet()) {
					System.out.println(entry.getKey() + " ||| " + entry.getValue());
				}

			}
		}

		// for (String href : TextBoostUtils.findAll(index, "<a.*?wiki/English-Hanzi/.*?</a>")) {
		// // Example: <a href="/wiki/English-Hanzi/Oxygen" title="English-Hanzi/Oxygen">English-Hanzi/Oxygen</a>
		//
		// try {
		// String link = TextBoostUtils.find(href, "/wiki/[^\"]*", true);
		// String text = IoBoostUtils.getContent(new URL(baseUrl + link));
		// text = text.replace("\r", "").replace("\n", "");
		// text = text.replaceAll("<a[^>].+?>", "").replace("</a>", "");
		// // String chinese = TextBoostUtils.find(text, "<p>[^<]*?<b>.*?</p>", true);
		// List<String> englishChinese = TextBoostUtils.findAll(text, "<p>[^<]*?<b>.*?</p>");
		// String chinese;
		// switch (englishChinese.size()) {
		// case 1:
		// chinese = englishChinese.get(0);
		// break;
		// case 2:
		// chinese = englishChinese.get(1);
		// break;
		// default:
		// chinese = null;
		// }
		//
		// if (chinese == null) {
		// System.err.println("did not find chinese in " + link);
		// } else {
		// chinese = chinese.replaceAll("<.+?>", "");
		// result.append(chinese + " ([b:en:" + link.replace("/wiki/", "") + "| English-Hanzi von [[b:en:User:Efex3|Efex3]]\r\n");
		// }
		// } catch (Exception ex) {
		// ex.printStackTrace();
		// }
		// }

		return alternatives.toString() + "\r\n\r\n\\r\n\r\n" + mnemonics;
	}

	private static List<String> splitLine(String line) {
		ArrayList<String> result = new ArrayList<String>();
		String[] columns = line.split("</td>");
		boolean firstColumn = true;
		for (String column : columns) {
			if (firstColumn) { // beginning with tr
				column = column.replaceAll("<tr .*?>", "");// remove beginning tr
			}
			String[] parts = column.split("<td");
			for (String part : parts) {
				if (!"".equals(part))
					result.add("<td" + part);
			}
			firstColumn = false;
		}
		return result;
	}

	// private static String getCharactersAsText(String html) throws Exception {
	// return asText(html);
	// }
	//
	// private static String getMnemonicAsText(String html) throws Exception {
	// return asText(html);
	// }

	/**
	 * Html Example:
	 * <tr class='success' id='236'>
	 * <td style='width: 140px; ' class='hanzi'><font><font uid='9efd' id='chanzilarge'>黽</font><font uid='9efd' id='jhanzilarge'>黽</font></font>
	 * <a href='?reversec=黽'>R</a><a target='' href='index.php?submit=236'>S</a>
	 * <hr>
	 * <font uid='9efe' id='chanzilarge'>黾</font><font uid='9efe' id='jhanzilarge'>黾</font>
	 * 
	 * @param html
	 * @return
	 * @throws Exception
	 */
	public static String asText(String html) throws Exception {
		html = html.replaceAll("<tr .*?>", "");
		html = html.replaceAll("<td>", "");
		html = html.replaceAll("<td .*?>", "");
		html = html.replaceAll("<font.*?>", "");
		html = html.replaceAll("</font>", "");
		html = html.replaceAll(">R<", "><");
		html = html.replaceAll(">S<", "><");
		html = html.replaceAll("<a .*?>", "");
		html = html.replaceAll("</a>", "");
		html = html.replaceAll("<i>", "");
		html = html.replaceAll("</i>", "");
		html = html.replaceAll("<hr>", "");
		html = html.replaceAll("<img .*?>", "");
		html = html.replaceAll("&rarr;", "->");
		html = html.replaceAll("→", "->");
		html = html.replaceAll("<br>", "");
		html = html.replaceAll("<ul>", "");
		html = html.replaceAll("</ul>", "");

		return html;

	}

	public static void main(String[] ignored) throws Exception {
		System.out.println(extract());
	}

}
