# coding: UTF-8

#=メール解析
#
# 最初の著者:: トゥイー
# リポジトリ情報:: $Id: m2w_mail_parser_html_text_type.rb 621 2012-03-17 02:36:35Z toy_dev $
# 著作権:: Copyright (C) Ownway.info, 2011. All rights reserved.
# ライセンス:: CPL(Common Public Licence)
class Mail2WeblogMailParserHtmlTextType

	def split(content)
		M2W_LOGGER.debug("Start  #{self.class}#split ... content = #{content}")

		content = content.gsub(/\r|\n/, '')

		if %r!<body[^>]*?>(.+)</body>!i =~ content then
			content = $1
		end

		content = arrange_content(content)

		temp = []
		while true
			(left, sep, right) = content.partition(%r!</(div|p)>|<(br|p)[^>]*?/?>!i)
			if sep.length == 0 then
				temp.push(__clean_html_line(left))
				break
			else
				line = __clean_html_line(left)
				line << sep
				temp.push(line)
				content = right
			end
		end

		result = []
		temp.each do |line|
			if line.length > 0 then
				result.push(line)
			end
		end

		M2W_LOGGER.debug("Finish #{self.class}#split ... result = #{result.to_s}")
		return result
	end

	def is_space_line(line)
		if %r!^(<(div|p)[^>]*?>(\s|&nbsp;)*?)*?</(div|p)>$!i =~ line then
			return true
		elsif %r!^(\s|&nbsp;)*?<(br|p)[^>]*/?>!i =~ line then
			return true
		end

		return false
	end

	def parse_header(line, subject_separator)
		if %r!^<(div|p)[^>]*?>([0-9a-zA-Z_]+?)#{subject_separator}(.*?)</(div|p)>$!i =~ line then
			return [true, $2, $3]
		elsif %r!^([0-9a-zA-Z_]+?)#{subject_separator}(.*?)<(br|p)[^>]*/?>!i =~ line then
			return [true, $1, $2]
		elsif %r!^<(div|p)[^>]*?>([0-9a-zA-Z_]+?)#{subject_separator}(.*?)<(br|p)[^>]*/?>!i =~ line then
			return [true, $2, $3]
		else
			return [false, nil, nil]
		end
	end

	def parse_format_plugin_header(key)
		if /^#{M2W_FORMAT_PLUGIN_CONF_HEADER_PREFIX}([0-9a-zA-Z_]+)$/ =~ key then
			return [true, $1]
		else
			return [false, nil]
		end
	end

	def parse_subject_separation(line, subject_separator)
		if %r!^<(div|p)[^>]*?>([0-9a-zA-Z_]+?)(#{subject_separator})\3{3}</\1>$!i =~ line then
			return [true, $2]
		elsif %r!^([0-9a-zA-Z_]+?)(#{subject_separator})\2{3}<(br|p)[^>]*/?>!i =~ line then
			return [true, $1]
		else
			return [false, nil]
		end
	end

	def arrange_content(content)
		M2W_LOGGER.debug("Start  #{self.class}#arrange_content ... content = #{content}")

		result = __balance_div_tags(__delete_suffix_tags(__delete_prefix_tags(content)))

		buffers = result.split(/\n/)

		while buffers.length > 0 && is_space_line(buffers[0])
			buffers.shift
		end

		while buffers.length > 0 && is_space_line(buffers[buffers.length - 1])
			buffers.pop
		end

		result = ""
		buffers.each do |line|
			result << line
			result << "\n"
		end

		M2W_LOGGER.debug("Finish #{self.class}#arrange_content ... result = #{result}")
		return result.chomp
	end

	def __delete_prefix_tags(content)
		while %r!^\s*?<div[^>]*?>\s*?<div[^>]*?>(.+?)$!i =~ content
			content = "<div>#{$1}"
		end

		return content
	end

	def __delete_suffix_tags(content)
		while (pos = content.rindex(%r!</div>(\s|&nbsp;)*?</div>(\s|&nbsp;)*?!mi)) != nil
			if content.slice(pos, content.length - pos) =~ %r!</div>(\s|&nbsp;)*?</div>(.+)!mi then
				right = $2
				right.gsub!(/(\r|\n)/, "")
				if right !~ %r!^(\s|&nbsp;)*$!i then
					break
				end
			end
			content = content.slice(0, pos) + "</div>"
		end

		return content
	end

	def __clean_html_line(line)
		# 前後のスペースを排除する
		if /^\s*([^\s]*)\s*$/ =~ line then
			line = $1
		end

		return line
	end

	def __count_pattern_matched(content, pattern)
		result = 0

		pos = 0
		while (pos = content.index(pattern, pos)) != nil
			pos = pos + 1
			result = result + 1
		end

		return result
	end

	def __balance_div_tags(content)
		start_div_count = __count_pattern_matched(content, %r!<div!i)
		end_div_count = __count_pattern_matched(content, %r!</div!i)

		if start_div_count > end_div_count then
			return content + ("</div>" * (start_div_count - end_div_count))
		elsif start_div_count < end_div_count then
			return ("<div>" * (end_div_count - start_div_count)) + content
		else
			return content
		end
	end

end
