package net.sf.gluebooster.demos.pojo.languages.chinese;

import java.io.Reader;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.junit.Assert;
import org.junit.Test;

import net.sf.gluebooster.java.booster.essentials.utils.IoBoostUtils;

public class ChineseHelperTest {


	@Test
	public void testChangeTraditionalToSimplifiedParseDictionary()
			throws Exception {
		String text = "#my dictionary\n\n貝 贝 [bei4] /example\n瓦 瓦 [wa4] / decken; \n\n";
		Collection<ChineseVocabularyEntry> dictionary = CeDictParser
				.parseDictionary(text, false);
		ChineseHelper helper = new ChineseHelper();
		helper.setDictionary(dictionary);

		String changed = helper.changeTraditionalToSimplified("my text is 貝瓦.");
		Assert.assertEquals("my text is 贝瓦.", changed);

	}


	private Collection<ChineseVocabularyEntry> getSampleDictionary()
			throws Exception {
		StringBuilder dictionaryText = new StringBuilder("#my dictionary");
		CeDictParser.addEntry(dictionaryText, "贝", "貝", "bei4", "example");
		CeDictParser.addEntry(dictionaryText, "瓦", "瓦", "wa4", "decken");
		CeDictParser.addEntry(dictionaryText, "贝贝", "貝貝", "bei4 bei4",
				"example2");
		CeDictParser.addEntry(dictionaryText, "贝瓦", "貝瓦", "bei4 wa4",
				"example3");
		CeDictParser.addEntry(dictionaryText, "贝瓦贝瓦", "貝瓦貝瓦",
				"bei4 wa4 bei4 wa4", "example4");
		CeDictParser.addEntry(dictionaryText, "贝瓦贝瓦贝瓦", "貝瓦貝瓦貝瓦",
				"bei4 wa4 bei4 wa4 bei4 wa4", "example5");

		Collection<ChineseVocabularyEntry> dictionary = CeDictParser
				.parseDictionary(dictionaryText.toString(), false);
		return dictionary;
	}

	@Test
	public void testFindAdditionalVocabulary() throws Exception {

		ChineseHelper helper = new ChineseHelper();
		helper.setDictionary(getSampleDictionary());
		helper.setCharacterComponents(getSampleCharacterComponents());
		;

		List<String> newVocabulary = helper.findAdditionalVocabulary("贝瓦\n瓦");
		Assert.assertTrue(newVocabulary.contains("贝"));
		Assert.assertTrue(newVocabulary.contains("贝瓦贝瓦"));
		Assert.assertTrue(newVocabulary.contains("贝贝"));
		Assert.assertFalse(newVocabulary.contains("瓦"));

	}

	private Map<String, List<String>> getSampleCharacterComponents()
			throws Exception {
		StringBuilder text = new StringBuilder();
		text.append("= Table =\n");
		text.append("==	一	 ==\n");
		text.append("").append("\n");
		text.append("<pre>	\n");
		text.append("	幺	3	一	幺	3		*	0		VI	*").append("\n");
		text.append("	亠	2	一	亠	2		*	0		IM	*").append("\n");
		text.append("	玄	5	吕	亠	2		幺	3		YVI	*").append("\n");
		text.append("	甆	14	咒	玄	5		瓦	5		TVIN	瓦").append("\n");
		text.append("	贝	4	+	冂	2		人	2		BO	貝").append("\n");
		text.append("	贞	6	吕	卜	2		贝	4		YBO	貝").append("\n");
		text.append("	冂	2	一	冂	2		*	0		LS	*").append("\n");
		text.append("	人	2	一	人	2		*	0		O	*").append("\n");
		text.append("	瓦	5	一	瓦	5		*	0		MVNI	*").append("\n");
		text.append("	一	1	一	一	1		*	0		M	*").append("\n");
		text.append("	丁	2	吕	一	1		亅	1		MN	一").append("\n");
		text.append("	丂	2	一	丂	2		*	0		MVS	一").append("\n");
		text.append("	七	2	一	七	2		*	0		JU	一").append("\n");
		text.append("	丄	2	一	丄	2		*	0		LM	一").append("\n");
		text.append("	丅	2	一	丅	2		*	0		ML	一").append("\n");
		text.append("	丆	2	吕	一	2		丿	0		MH	一").append("\n");
		text.append("</pre>	\n");

		Map<String, List<String>> components = ChineseHelper
				.parseWikipediaCharacterComponentsVersion2015_02_16(text
						.toString());

		return components;
	}

	@Test
	public void testParseWikipediaCharacterComponentsVersion2015_02_16()
			throws Exception {
		Map<String, List<String>> components = getSampleCharacterComponents();
		Assert.assertTrue(components.containsKey("一"));
		Assert.assertTrue(components.containsKey("丁"));
		Assert.assertTrue(components.get("丁").contains("一"));
		Assert.assertTrue(components.get("丁").contains("亅"));
		Assert.assertTrue(components.get("甆").contains("瓦"));

	}

	@Test
	public void testChineseWordSorting() throws Exception {
		Map<String, List<String>> components = getSampleCharacterComponents();

		ChineseHelper helper = new ChineseHelper();
		helper.setDictionary(getSampleDictionary());
		helper.setCharacterComponents(components);

		// sort the characters that are compound
		HashSet<String> characters = new HashSet<String>(components.keySet());
		List<String> sortedCharacters = helper.sortCharacters(characters);
		Assert.assertTrue(sortedCharacters.size() == characters.size());

		// sort all characters of the components
		for (List<String> componentList : components.values()) {
			characters.addAll(componentList);
		}
		sortedCharacters = helper.sortCharacters(characters);
		Assert.assertTrue(sortedCharacters.size() == characters.size());
		Assert.assertTrue(sortedCharacters.indexOf("人") < sortedCharacters
				.indexOf("贝"));

	}


}
