package net.sf.gluebooster.demos.pojo.languages.chinese;

import java.io.BufferedReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;

import javax.swing.table.DefaultTableModel;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;

import net.sf.gluebooster.java.booster.basic.container.ComparatorBoostUtils;
import net.sf.gluebooster.java.booster.basic.container.ContainerBoostUtilsBasic;
import net.sf.gluebooster.java.booster.essentials.meta.BoostedComparator;
import net.sf.gluebooster.java.booster.essentials.objects.BoostedObject;
import net.sf.gluebooster.java.booster.essentials.utils.TextBoostUtils;

/**
 * Helper methods for chinese.
 * 
 * @author CBauer
 * 
 */
public class ChineseHelper extends BoostedObject {

	/**
	 * The dictionary to be used.
	 */
	private Collection<ChineseVocabularyEntry> dictionary;

	/**
	 * A map from the characters to the corresponding vocabulary entries.
	 */
	private Map<String, ChineseVocabularyEntry> dictionaryMap;

	/**
	 * Drill down into the components of a character.
	 */
	private Map<String, List<String>> characterComponents;

	/**
	 * A map from the traditional characters to the corresponding vocabulary entries.
	 * 
	 * @return the created map
	 */
	private Map<String, ChineseVocabularyEntry> getTradionalMap() {
		return getDictionaryMap(true, false);
	}

	/**
	 * A map from the simplified characters to the corresponding vocabulary entries.
	 * 
	 * @return the created map
	 */
	private Map<String, ChineseVocabularyEntry> getSimplifiedMap() {
		return getDictionaryMap(false, true);
	}

	/**
	 * Creates a map with the chinese characters as keys and the corresponding vocabulary entry as value
	 * 
	 * @param addTraditional
	 *            should the traditional characters be used as key
	 * @param addSimplified
	 *            should the simplified characters be used as key
	 * @return the created map.
	 */
	private Map<String, ChineseVocabularyEntry> getDictionaryMap(boolean addTraditional, boolean addSimplified) {
		HashMap<String, ChineseVocabularyEntry> result = new HashMap<String, ChineseVocabularyEntry>();
		for (ChineseVocabularyEntry entry : dictionary) {
			// if ("麼".equals(entry.getTraditional()) || "麼".equals(entry.getSimplified())) {
			// System.out.println("found");
			// }

			if (addTraditional){
				if (!result.containsKey(entry.getTraditional())) {
					result.put(entry.getTraditional(), entry);
				}
			}

			if (addSimplified) {
				result.put(entry.getSimplified(), entry);
			}
		}

		return result;
		
	}

	/**
	 * Create a map from the traditional to the simplified characters.
	 * 
	 * @return the created map
	 */
	private Map<String, String> getTradionalSimplifiedMap() {
		HashMap<String, String> result = new HashMap<String, String>();
		for (ChineseVocabularyEntry entry : dictionary) {
			String traditional = entry.getTraditional();
			String simplified = entry.getSimplified();
			addToTraditionalSimplifiedMap(result, traditional, simplified);
		}

		return result;
	}

	/**
	 * Adds one entry (traditional to simplified character) into the map
	 * 
	 * @param result
	 *            the entry will be added here
	 * @param traditional
	 *            the traditional character(s)
	 * @param simplified
	 *            the corresponding simplified character(s)
	 */
	private void addToTraditionalSimplifiedMap(Map<String, String> result,
			String traditional, String simplified) {
		// if (traditional.contains("麼")) {
		// System.out.println("test麼");
		// }
		// if ("甚".equals(traditional)) {
		// System.out.println("test甚");
		// }

		// Add a entry with equal simplified and traditional value only if
		// there is no entry (with maybe different characters) yet
		if ((!simplified.equals(traditional))
				|| !result.containsKey(traditional)) {
			result.put(traditional, simplified);
		}

		int size = traditional.length();
		if (size > 1) {
			// add single characters
			for (int i = 0; i < size - 1; i++) {
				addToTraditionalSimplifiedMap(result,
						traditional.substring(i, i + 1),
						simplified.substring(i, i + 1));
			}
		}

	}

	/**
	 * Changes the traditional characters of a text into simplified characters
	 * 
	 * @param chinesetext
	 *            the original text with traditional characters
	 * @return the modified text
	 */
	public String changeTraditionalToSimplified(String chinesetext){
		
		Map<String, String> tradionalSimplified = getTradionalSimplifiedMap();

		StringBuilder result = new StringBuilder();
		int size = chinesetext.length();
		for (int i = 0; i < size; i++) {
			String character = chinesetext.substring(i, i + 1);
			String simplified = tradionalSimplified.get(character);
			if (simplified == null) {
				simplified = character;
			}
			result.append(simplified);
		}

		return result.toString();
	}

	public Collection<ChineseVocabularyEntry> getDictionary() {
		return dictionary;
	}

	public void setDictionary(Collection<ChineseVocabularyEntry> dictionary) {
		this.dictionary = dictionary;
		if (dictionary == null) {
			dictionaryMap = null;
		} else {
			dictionaryMap = getDictionaryMap(true, true);
		}
	}

	/**
	 * Converts a text into its lines. Empty lines are ignored
	 * 
	 * @param text
	 *            the inspected text
	 * @return the lines of the text
	 */
	private List<String> extractNonEmptyLines(String text) throws Exception {
		List<String> result = new ArrayList<String>();
		BufferedReader reader = new BufferedReader(new StringReader(text));
		String line = reader.readLine();
		while (line != null) {
			line = line.trim();
			if (!line.equals("")) {
				result.add(line);
			}
			line = reader.readLine();
		}
		return result;
	}

	/**
	 * Gets the different characters of a word.
	 * 
	 * @param word
	 *            the word to be inspected
	 * @return strings and not characters, because handling and processing strings is easier
	 */
	private Set<String> getCharacters(String word) {
		HashSet<String> result = new HashSet<String>();
		for (int i = word.length() - 1; i >= 0; i--) {
			result.add(word.substring(i, i + 1));
		}
		return result;
	}

	/**
	 * Gets the different characters of some words
	 * 
	 * @param words
	 *            the words to be inspected
	 * @return the used characters
	 */
	private Set<String> getCharacters(Collection<String> words) {
		HashSet<String> result = new HashSet<String>();
		for (String word : words) {
			result.addAll(getCharacters(word));
		}
		return result;
	}


	/**
	 * Finds words that contain only some given characters
	 * 
	 * @param dictionary
	 *            the words are looked up here
	 * @param knownCharacters
	 *            the characters to be used
	 * @return the found words
	 */
	private Set<String> getWordsWithCharacters(
			Collection<ChineseVocabularyEntry> dictionary,
			Set<String> knownCharacters) throws Exception {
		HashSet<String> result = new HashSet<String>();

		for (ChineseVocabularyEntry entry : dictionary) {
			String word = entry.getSimplified();
			if (isContainedIn(word, knownCharacters)) {
				result.add(word);
			}
			word = entry.getTraditional();
			if (isContainedIn(word, knownCharacters)) {
				result.add(word);
			}
		}

		return result;
	}

	/**
	 * Are the characters of a word contained in a given set of characters
	 * 
	 * @param word
	 *            the word to be inspected
	 * @param characters
	 *            the characters to inspect
	 * @return true if all characters of the word are in the collection
	 */
	private boolean isContainedIn(String word, Collection<String> characters) {
		Set<String> chars = getCharacters(word);
		chars.removeAll(characters);
		return chars.isEmpty();
	}
	
	/**
	 * Adds the components (and subcomponents) of the characters
	 * 
	 * @param characters
	 *            the characters which components are to be added
	 * @return characters and their components
	 */
	public Set<String> addComponents(Set<String> characters) {
		HashSet<String> result = new HashSet<String>();
		ConcurrentLinkedQueue<String> queue = new ConcurrentLinkedQueue<String>(
				characters);
		while (!queue.isEmpty()) {
			String character = queue.remove();
			if (!result.contains(character)) {
				result.add(character);
				if (characterComponents.containsKey(character)) {
					queue.addAll(characterComponents.get(character));
				}
			}
		}
		return result;
	}

	/**
	 * Returns a comparator that sorts chinese characters according to their component structure
	 * 
	 * @return the comparator.
	 */
	private BoostedComparator<String> getCharacterSorter() throws Exception {
		return ComparatorBoostUtils
				.createComparatorByImmediatePredecessors(characterComponents);
	}
	
	/**
	 * Sorts characters according to their composition.
	 * 
	 * @param characters
	 *            the characters to be sorted.
	 * @return the sorted characters
	 */
	public List<String> sortCharacters(Set<String> characters) throws Exception {

		// add all component-characters, so that an ordering is possible
		ArrayList<String> result = new ArrayList<String>(
				addComponents(characters));
		
		//
		// BoostedComparator<String> comparator = new
		// BoostedComparator<String>() {
		//
		// @Override
		// public int compare(String char1, String char2) {
		// if (char1.equals(char2)) return 0;
		//
		// if (characterComponents.containsKey(char1)
		// && characterComponents.get(char1).contains(char2))
		// return 1;
		//
		// if (characterComponents.containsKey(char2)
		// && characterComponents.get(char2).contains(char1))
		// return -1;
		//
		// throw new IllegalStateException("characters not comparable "
		// + char1 + ":" + char2);
		//
		// }
		// };

		ContainerBoostUtilsBasic.sortPartialOrderedList(result, getCharacterSorter() /* comparator */);
		// remove the additional component characters
		result.retainAll(characters);
			
		return result;
	}

	/**
	 * finds all words in the dictionary that consist of known characters
	 * 
	 * @param textWithKnownVocabulary
	 *            contains all known characters
	 * @return the vocabulary list
	 */
	public List<String> findAdditionalVocabulary(
			String textWithKnownVocabulary
			) throws Exception {

		Set<String> knownVocabulary = new HashSet<String>(
				extractNonEmptyLines(textWithKnownVocabulary));
		Set<String> knownCharacters = getCharacters(knownVocabulary);
		
		final BoostedComparator<String> characterSorter = getCharacterSorter();
		// final List<String> sortedKnownCharacters =
		// sortCharacters(knownCharacters);

		Set<String> allWordsWithKnownCharacters = getWordsWithCharacters(dictionary, knownCharacters) ;
 		
		allWordsWithKnownCharacters.removeAll(knownVocabulary);
		//add the characters to be able to use there composition hierarchy for sorting.
		//they are removed later
		allWordsWithKnownCharacters.addAll(knownCharacters);
		ArrayList<String> result = new ArrayList<String>(
				allWordsWithKnownCharacters);
		
		
		BoostedComparator<String> comparator = new BoostedComparator<String>() {

			@Override
			public int compare(String word1, String word2) {
				//test equals
				if (word1.equals(word2)) return 0;
				
				//a word is < if it is a substring
				if (word1.indexOf(word2) > -1) return 1;
				if (word2.indexOf(word1) > -1) return -1;

				Set<String> characters1 = getCharacters(word1);
				Set<String> characters2 = getCharacters(word2);
				//words are equal if they have the same characters
				if (characters1.equals(characters2)) return 0;
				
				// a word is < if it has less different characters
				if (characters1.containsAll(characters2)) return 1;
				if (characters2.containsAll(characters1)) return -1;

				//A character is less than another 
				if (characterComponents != null &&  word1.length() == 1 && word2.length() == 1){
					return characterSorter.compare(word1, word2);
					// if (sortedKnownCharacters.indexOf(word1) <
					// sortedKnownCharacters
					// .indexOf(word1)) {
					// return -1;
					// } else {
					// return 1;
					// } // there can be no equals, because the words are not
					// equal
				}
				
				
				throw new IllegalStateException("words not comparable " + word1 + ":" + word2);
				
			}
		};

		ContainerBoostUtilsBasic.sortPartialOrderedList(result, comparator);
		// remove the known component characters
		knownCharacters.retainAll(knownVocabulary);
		result.removeAll(knownCharacters);

		return result;
	}

	/**
	 * Gets the components of a chinese character.
	 * 
	 * Parses a text of the structure of the edit part of
	 * 
	 * http://commons.wikimedia.org/w/index.php?title=Commons: Chinese_characters_decomposition&action=edit
	 *
	 * in the version of 16 February 2015
	 * 
	 * One line has the structure:
	 * 
	 * 1. Chinese characters.
	 * 
	 * 2. Number of strokes in the character
	 * 
	 * 3. Composition kind
	 * 
	 * 4. First character part (may be composed of several characters, if the composition does not exist as a single character).
	 * 
	 * 5. ...and Number of strokes in this first character part.
	 * 
	 * 6. Verification for the first part (empty = verification made; "?" = still to do).
	 * 
	 * 7. Second character part, "*" when no different second part (primitives, or repetitions).
	 * 
	 * 8. ...and Number of strokes in this second character part.
	 * 
	 * 9. Verification for the second part.
	 * 
	 * 10. Cangjie input method coding (for easy sorting)
	 * 
	 * 11. Radical (or "*" if the character itself is the key)
	 *
	 * @param wikipediaEditText
	 *            the text of the edit section of the wikipedia article
	 * @return the mapping from a character to its components
	 */
	public static Map<String, List<String>> parseWikipediaCharacterComponentsVersion2015_02_16(
			String wikipediaEditText) throws Exception {
		
		Map<String, List<String>> result = new HashMap<String, List<String>>();

		// remove everything outside the tables
		String PRE = "<pre>";
		List<String> parts = TextBoostUtils.getTextBetween(wikipediaEditText,
				PRE, "</pre>", false);

		for (String part : parts) {
			if (part.contains(PRE)) {
				throw new IllegalStateException("No " + PRE
						+ " allowed in a part");
			}
			BufferedReader lineReader = TextBoostUtils
					.createBufferedReader(part);
			String line = lineReader.readLine();
			// a line is like: 丁 2 吕 一 1 亅 1 MN 一
			while (line != null) {
				if (!line.trim().isEmpty()) {
					String[] lineParts = line.split("\t");
					ArrayList<String> compounds = new ArrayList<String>();
					String character = lineParts[1];
					String compound1 = lineParts[4];
					if (compound1 != null && !compound1.isEmpty()
							&& !compound1.equals(character)
							&& !"*".equals(compound1)) {
						compounds.add(compound1.trim());
					}
					String compound2 = lineParts[7];
					if (compound2 != null && !compound2.isEmpty()
							&& !compound2.equals(character)
							&& !"*".equals(compound2)) {
						compounds.add(compound2.trim());
				}

					result.put(character, compounds);

				}
				line = lineReader.readLine();
			}
		}

		return result;
	}

	public Map<String, List<String>> getCharacterComponents() {
		return characterComponents;
	}

	public void setCharacterComponents(
			Map<String, List<String>> characterComponents) {
		this.characterComponents = characterComponents;
	}

	/**
	 * Parses the text of the edit section of http://commons.wikimedia.org/w/index.php?title=Commons:Chinese_characters_decomposition&action=edit
	 * 
	 * The format is as of date 2016-04-28 (yyyy-mm-dd) in version
	 * https://commons.wikimedia.org/w/index.php?title=Commons:Chinese_characters_decomposition&oldid=178568869
	 * 
	 * The text contains the decomposition of chinese characters into sub components
	 * 
	 * @param text
	 *            of the edit section
	 * @return the decomposition (value) of the characters (key)
	 */
	public Map<String, List<String>> parseWikipediaTextToCharacterComponents(String text) throws Exception {
		Map<String, List<String>> result = new HashMap<String, List<String>>();

		// remove everything outside the tables
		String PRE = "<pre>";
		List<String> parts = TextBoostUtils.getTextBetween(text, PRE, "</pre>", false);

		for (String part : parts) {
			if (part.contains(PRE)) {
				throw new IllegalStateException("error in parsing: " + part + "\nof text\n" + text);
			}
			BufferedReader lineReader = TextBoostUtils.createBufferedReader(part);
			String line = lineReader.readLine();
			while (line != null) {
				if (!line.trim().isEmpty()) {
					try {
						String[] columns = line.split("\t");
						// character
						String character = columns[1];
						ArrayList<String> compounds = new ArrayList<String>(2);
						// compound 1
						String compound1 = columns[4];
						if (!compound1.equals(character) && !compound1.equals("*"))
							compounds.add(compound1);
						// compound 2
						String compound2 = columns[7];
						if (!compound2.equals(character) && !compound2.equals("*"))
							compounds.add(compound2);

						result.put(character, compounds);
					} catch (Exception ex) {
						throw new IllegalStateException("could not parse line:" + line, ex);
					}
				}

				line = lineReader.readLine();
			}
		}

		return result;
	}

	/**
	 * Creates a table model with the columns "characters", "pinyin", "translation"
	 * 
	 * @return the created model
	 */
	private DefaultTableModel createDefaultTableModel() {
		DefaultTableModel result = new DefaultTableModel();
		result.setColumnIdentifiers(new Object[] { "characters", "pinyin", "translation" });
		return result;
	}

	/**
	 * Extracts the vocabulary from a chinese text
	 * 
	 * @param chineseText
	 *            the text to analyze
	 * @return the vocabulary
	 */
	public DefaultTableModel createChineseVocabulary(String chineseText) throws Exception {
		DefaultTableModel result = createDefaultTableModel();


		int beginningOfLine = 0;

		int size = chineseText.length();
		int wordLength = 10;
		HashSet<String> usedWords = new HashSet<String>();
		for (int i = 0; i < size; i++) {
			// append full lines
			if (chineseText.charAt(i) == '\n' || i == size - 1) {
				// add last line
				if (i > beginningOfLine) {
					String line = chineseText.substring(beginningOfLine, (chineseText.charAt(i) == '\n' ? i : i + 1));
					beginningOfLine = i + 1;
					result.addRow(new Object[] { line });
				}
			}

			for (int j = 0; j < wordLength; j++) {
				if (i + j < size) {
					// first get the single word, then the multi character word.
					// String word = text.substring(i + j, i + j + 1);
					// addWordTranslation(result, usedWords, word,
					// chineseFrench,
					// chineseGerman, characterComponents);
					String word = chineseText.substring(i, i + j + 1);
					addWordTranslation(result, usedWords, word);
				}
			}
		}

		return result;
	}

	/**
	 * Adds the translation of one word.
	 * 
	 * @param result
	 *            the translation will be added here
	 * @param usedWords
	 *            the words already used
	 * @param word
	 *            the new word
	 */
	private void addWordTranslation(DefaultTableModel result, HashSet<String> usedWords, String word) {

		// should there be less memory used, but more computation time
		boolean minimizeMemory = true;

		if (characterComponents.containsKey(word)) {
			for (String component : characterComponents.get(word)) {
				addWordTranslation(result, usedWords, component);
			}
		}

		if (!usedWords.contains(word)) {
			usedWords.add(word);

			// is the word found in the dictionary
			boolean foundWord = false;

			ChineseVocabularyEntry pinyinTranslation = null;
			if (dictionaryMap.containsKey(word)) {
				pinyinTranslation = dictionaryMap.get(word);
			}

			if (pinyinTranslation != null) {
				foundWord = true;
				// first add subwords
				if (word.length() > 1) {// only then subwords can occur
					for (String subword : TextBoostUtils.splitIntoAllSubstrings(word, false)) {
						addWordTranslation(result, usedWords, subword);
					}
				}
				// end of subwords

				Object[] row = { "", "", "" };
				// word, pinyin, translation

				row[0] = word.replace('\t', ' ');
				// todo consider lists mit multiple entries
				row[1] = pinyinTranslation.getPinyinTranslations().get(0).getLeft();
				String translation = "" + pinyinTranslation.getPinyinTranslations().get(0).getRight();
				translation = translation.substring(1, translation.length() - 1); // without []
				row[2] = translation;
				// todo add simplified form of traditional character

				result.addRow(row);
			}

			if (minimizeMemory && !foundWord) {
				usedWords.remove(word);
				// remove the word from the set to keep the set small.
				// duplicates of such words are handled again.
			}
		}
	}

	/**
	 * Sorts lines of a text into a given vocabulary. A line is put into the vocabulary text after all its characters are known.
	 * 
	 * @param vocabularyText
	 *            contains the known vocabulary
	 * @param newText
	 *            the text to be sorted into the vocabularyText
	 * @return the combined text
	 */
	public static StringBuilder sortTextIntoVocabulary(String vocabularyText, String newText) {

		// BufferedReader newTextLines = new BufferedReader(new
		// StringReader(newText));
		// mapping line -> the characters of the line
		HashMap<String, Set<String>> lines = new HashMap<String, Set<String>>();
		for (String line : newText.split("\n")) {
			line = line.trim();// remove leading spaces and \r
			HashSet<String> set = new HashSet<String>();
			for (int i = line.length(); i > 0; i--)
				set.add(line.substring(i - 1, i));

			lines.put(line, set);
		}

		StringBuilder result = new StringBuilder();
		for (String line : vocabularyText.split("\n")) {
			result.append(line).append("\n");
			for (int i = line.length(); i > 0; i--) {
				if (lines.isEmpty())
					break;

				ArrayList<String> keysToRemove = new ArrayList<String>();
				String c = line.substring(i - 1, i);
				for (Entry<String, Set<String>> entry : lines.entrySet()) {
					Set<String> value = entry.getValue();
					value.remove(c);
					if (value.isEmpty()) {
						String key = entry.getKey();
						result.append(key).append("\n");
						keysToRemove.add(key);
					}
				}
				for (String key : keysToRemove)
					lines.remove(key);
			}
		}

		if (!lines.isEmpty()) {
			result.append("\nRemaining:\n");
			int maxRemainingSize = 10;
			HashMap<Integer, StringBuilder> remainingSizeTexts = new HashMap<Integer, StringBuilder>();
			for (int remainingSize = 1; remainingSize <= maxRemainingSize; remainingSize++) {
				remainingSizeTexts.put(remainingSize, new StringBuilder());
			}

			for (Entry<String, Set<String>> remainingText : lines.entrySet()) {
				int size = remainingText.getValue().size();
				if (size > maxRemainingSize)
					size = maxRemainingSize;
				remainingSizeTexts.get(size).append(remainingText.getKey()).append(" missing '").append(remainingText.getValue()).append("'").append("\n");
			}
			for (int remainingSize = 1; remainingSize <= maxRemainingSize; remainingSize++) {
				result.append("\nSize ").append(remainingSize).append("\n");
				result.append(remainingSizeTexts.get(remainingSize));
			}
		}

		return result;

	}

	public ChineseVocabularyEntry getEntry(String chineseCharacter) {
		return dictionaryMap.get(chineseCharacter);
	}

	public List<Pair<String, ChineseVocabularyEntry>> getSubdictionary(Collection<String> chineseCharacters) throws Exception {

		ArrayList<Pair<String, ChineseVocabularyEntry>> result = new ArrayList<Pair<String, ChineseVocabularyEntry>>();
		for (String chineseCharacter : chineseCharacters) {
			ChineseVocabularyEntry entry = dictionaryMap.get(chineseCharacter);
			if (entry != null) {
				result.add(new ImmutablePair<String, ChineseVocabularyEntry>(chineseCharacter, entry));
			}
		}

		return result;
	}
}
