package jp.oarts.pirka.core.analyzer.html;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.TreeMap;

import jp.oarts.pirka.core.general.HtmlParts;
import jp.oarts.pirka.core.general.HtmlPartsType;

/**
 * HTMLt@C𕪉NX
 * 
 * @author ito
 * 
 */
public class HtmlSplitAnalyzer  implements Serializable{

	/**
	 * 
	 */
	private static final long serialVersionUID = 6298559473325517048L;

	/**
	 * HTMLt@CC[WǂݍHTMLiXg쐬B
	 * 
	 * @param html
	 *            HTMLt@CC[W
	 * @return iX
	 * @throws IOException
	 */
	public static List<HtmlParts> getHtmlParts(String html) {
		Reader r = null;
		try {
			r = new StringReader(html);
			return getHtmlParts(r);
		} catch (IOException e) {
			// XgO[_[Ȃ̂ŃG[͖͂
			throw new RuntimeException("IOG[܂");
		} finally {
			if (r != null) {
				try {
					r.close();
				} catch (IOException e) {
					// XgO[_[Ȃ̂ŃG[͖͂
					throw new RuntimeException("IOG[܂");
				}
			}
		}
	}

	/**
	 * [_[HTMLt@CC[WǂݍHTMLiXg쐬B
	 * 
	 * @param r
	 *            [_[
	 * @return iXg
	 * @throws IOException
	 */
	public static ArrayList<HtmlParts> getHtmlParts(Reader r) throws IOException {

		Stack<String> deepStack = new Stack<String>();
		int deep = 0;
		String[] parts = split(r);
		HtmlParts[] htmlPatrs = new HtmlParts[parts.length];

		for (int i = parts.length - 1; i >= 0; i--) {

			htmlPatrs[i] = new HtmlParts();

			HtmlPartsType type = getType(parts[i]);

			htmlPatrs[i].setType(type);
			htmlPatrs[i].setOrgString(parts[i]);

			String tagName = "";
			if (type == HtmlPartsType.TAG || type == HtmlPartsType.TAG_END) {
				tagName = getTagName(parts[i]);
				TreeMap<String, String> optionMap = HtmlTools.getOptionMap(parts[i]);
				htmlPatrs[i].setOption(optionMap);
				String name = optionMap.get("name");
				if (name != null && name.length() > 0) {
					htmlPatrs[i].setName(name);
				}
			}
			htmlPatrs[i].setTagName(tagName);

			if (htmlPatrs[i].getType() == HtmlPartsType.TAG_END) {
				deepStack.push(tagName);
				htmlPatrs[i].setDeep(deep);
				deep++;
			} else if (htmlPatrs[i].getType() == HtmlPartsType.TAG) {
				if (deepStack.size() > 0 && deepStack.peek().equals(tagName)) {
					deep--;
					htmlPatrs[i].setDeep(deep);
					deepStack.pop();
				} else {
					htmlPatrs[i].setDeep(deep);
				}
			} else {
				htmlPatrs[i].setDeep(deep);
			}

		}

		if (htmlPatrs.length > 0 && htmlPatrs[0].getDeep() != 0) {
			throw new RuntimeException("HTML̉͂Ɏs܂B^O̊KwɌ肪܂B");
		}

		ArrayList<HtmlParts> list = new ArrayList<HtmlParts>();

		gatherHtmlParts(list, 0, 0, htmlPatrs);

		return list;

	}

	/**
	 * HTML̕iW߂
	 * 
	 * @param list
	 *            W߂ii[郊Xg
	 * @param deep
	 *            ݂̃^O̊Kwi[j
	 * @param offset
	 *            JnbaseList̃ItZbg
	 * @param baseList
	 *            HTMLp[c̈ꗗ
	 * @return ^O̊Kwi[j
	 */
	private static int gatherHtmlParts(ArrayList<HtmlParts> list, int deep, int offset, HtmlParts[] baseList) {

		for (int i = offset; i < baseList.length; i++) {
			if (baseList[i].getDeep() < deep) {
				return i;
			}

			if (baseList[i].getType() == HtmlPartsType.TAG) {

				if (i < baseList.length - 1) {
					if (baseList[i + 1].getDeep() > deep) {
						ArrayList<HtmlParts> childList = new ArrayList<HtmlParts>();
						list.add(baseList[i]);
						baseList[i].setChild(childList);

						int nextOffset = gatherHtmlParts(childList, baseList[i + 1].getDeep(), i + 1, baseList);
						if (nextOffset < 0) {
							return -1;
						}
						baseList[i].setEndTag(baseList[nextOffset].getOrgString());
						i = nextOffset;
					} else if (baseList[i + 1].getDeep() == deep && baseList[i + 1].getType() == HtmlPartsType.TAG_END) {
						ArrayList<HtmlParts> childList = new ArrayList<HtmlParts>();
						list.add(baseList[i]);
						baseList[i].setChild(childList);

						// int nextOffset = gatherHtmlParts(childList, baseList[i + 1].getDeep(), i + 1, baseList);
						// if (nextOffset < 0) {
						// return -1;
						// }
						baseList[i].setEndTag(baseList[i + 1].getOrgString());
						i = i + 1;
					} else {
						list.add(baseList[i]);
					}
				}
			} else {
				list.add(baseList[i]);
			}
		}
		return -1;

	}

	/**
	 * p[^ŗ^ꂽHTML̃RgAJn^OAI^OÂǂɊY邩𒲂ׂ̃^CvԂB
	 * 
	 * @param str
	 *            
	 * @return HTMLi^Cv
	 */
	private static HtmlPartsType getType(String str) {

		if (str.length() >= 7 && str.substring(0, 4).equals("<!--") && str.substring(str.length() - 3).equals("-->")) {
			return HtmlPartsType.COMMENT;
		}

		if (str.length() >= 4 && str.substring(0, 2).equals("</") && HtmlTools.isTagChar(str.charAt(2)) && str.charAt(str.length() - 1) == '>') {
			return HtmlPartsType.TAG_END;
		}

		if (str.length() >= 3 && str.charAt(0) == '<' && HtmlTools.isTagChar(str.charAt(1)) && str.charAt(str.length() - 1) == '>') {
			return HtmlPartsType.TAG;
		}

		return HtmlPartsType.STRING;

	}

	/**
	 * Jn^OAI^O^OoBo^O͉pɕϊB
	 * 
	 * @param str
	 *            ^O
	 * @return ^Óipj
	 */
	private static String getTagName(String str) {

		int offset = 0;
		if (str.charAt(1) == '/') {
			offset = 2;
		} else {
			offset = 1;
		}

		StringBuilder sb = new StringBuilder();
		for (int i = offset; i < str.length(); i++) {
			char c = str.charAt(i);
			if (!HtmlTools.isTagInSeparator(c) && c != '>') {
				sb.append(c);
			} else {
				break;
			}
		}

		return sb.toString().toLowerCase();
	}

	/**
	 * [_[f[^ǂݍݕARgAJn^OAI^OɕB
	 * 
	 * @param r
	 *            [_[
	 * @return Xvbg̔z
	 * @throws IOException
	 */
	private static String[] split(Reader r) throws IOException {

		boolean startFlag = false;
		StringBuilder sb = new StringBuilder();
		ArrayList<String> parts = new ArrayList<String>();

		while (true) {
			int cint = r.read();
			if (cint < 0) {
				break;
			}
			char c = (char) cint;

			if (!startFlag) {
				if (c == '<') {
					if (sb.length() > 0) {
						parts.add(sb.toString());
						sb.setLength(0);
					}
					startFlag = true;
				}
				sb.append(c);
			} else {
				sb.append(c);
				if (c == '!') {
					commentStart(sb, r, parts);
				} else if (HtmlTools.isTagChar(c)) {
					tagStart(sb, r, parts);
				} else if (c == '/') {
					endTagStart(sb, r, parts);
				}
				startFlag = false;
			}
		}

		if (sb.length() > 0) {
			parts.add(sb.toString());
			sb.setLength(0);
		}

		return parts.toArray(new String[0]);
	}

	/**
	 * [_[RgI[܂łoXgOr_[IuWFNgyуXg֊i[B <b> ݃[_[ǂݏoĂ镶񂪃RgłȂ΃Xgւ͕ۑȂB
	 * 
	 * @param sb
	 *            XgOr_[IuWFNg
	 * @param r
	 *            [_[
	 * @param parts
	 *            Xg
	 * @throws IOException
	 */
	private static void commentStart(StringBuilder sb, Reader r, ArrayList<String> parts) throws IOException {

		while (true) {
			int cint = r.read();
			if (cint < 0) {
				break;
			}
			char c = (char) cint;

			sb.append(c);
			if (sb.length() <= 4) {
				if (c != '-') {
					return;
				}
			} else {
				if (c == '>' && sb.length() >= 7 && sb.charAt(sb.length() - 2) == '-' && sb.charAt(sb.length() - 3) == '-') {
					parts.add(sb.toString());
					sb.setLength(0);
					return;
				}

			}
		}

	}

	/**
	 * [_[^ȌI[܂łoXgOr_[IuWFNgyуXg֊i[B<b> ݃[_[ǂݏoĂ镶񂪊Jn^OłȂ΃Xgւ͕ۑȂB
	 * 
	 * @param sb
	 *            XgOr_[IuWFNg
	 * @param r
	 *            [_[
	 * @param parts
	 *            Xg
	 * @throws IOException
	 */
	static void tagStart(StringBuilder sb, Reader r, ArrayList<String> parts) throws IOException {

		while (true) {
			int cint = r.read();
			if (cint < 0) {
				break;
			}
			char c = (char) cint;

			sb.append(c);
			if (c == '>') {
				parts.add(sb.toString());
				sb.setLength(0);
				return;
			} else if (c == '\"' || c == '\'') {
				HtmlTools.stringStart(sb, r, c);
			}
		}

	}

	/**
	 * [_[^ȌI[܂łoXgOr_[IuWFNgyуXg֊i[B ݃[_[ǂݏoĂ镶񂪏I^OłȂ΃Xgւ͕ۑȂB
	 * 
	 * @param sb
	 *            XgOr_[IuWFNg
	 * @param r
	 *            [_[
	 * @param parts
	 *            Xg
	 * @throws IOException
	 */
	static void endTagStart(StringBuilder sb, Reader r, ArrayList<String> parts) throws IOException {

		while (true) {
			int cint = r.read();
			if (cint < 0) {
				break;
			}
			char c = (char) cint;

			sb.append(c);
			if (sb.length() == 3 && !HtmlTools.isTagChar(c)) {
				return;
			}
			if (c == '>') {
				parts.add(sb.toString());
				sb.setLength(0);
				return;
			} else if (c == '\"' || c == '\'') {
				HtmlTools.stringStart(sb, r, c);
			}
		}

	}

	/**
	 * eXgC
	 * 
	 * @param args
	 */
	public static void main(String[] args) {
		BufferedReader br = null;

		int no = 0;
		try {
			// br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
			// br = new BufferedReader(new InputStreamReader(HtmlSplit.class
			// .getResourceAsStream("test.html"), "JISAutoDetect"));

			URL url = HtmlSplitAnalyzer.class.getResource("/jp/oarts/hbj/control/view/FatalErrorDefaultWindow.html");
			System.out.println("URL=" + url.toString());

			br = new BufferedReader(new InputStreamReader(url.openStream(), "JISAutoDetect"));

			ArrayList<HtmlParts> htmlParts = getHtmlParts(br);
			for (HtmlParts part : htmlParts) {
				no++;
				System.out.println(no + ":" + part.toString());
			}

			// for (HtmlParts htmlPart : htmlParts) {
			// no++;
			// System.out.println(no + ":" + htmlPart.toString());
			//
			// }

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (br != null) {
				try {
					br.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

}
