#
# referenciate.rb:
# a library to markup text with HTML references
#
# $Id: referenciate.rb,v 1.7 2007/01/28 08:34:55 zunda Exp $
#
# Copyright:: Copyright (C) 2007 zunda <zunda at freeshell.org>
# License:: GPL
#
require 'entity-reference/references.rb'

module EntityReference
	Lists_to_apply = {
		'ISO-8859-1' => ['iso-8859-1', 'internationalization'],
		'UTF-8' => ['utf-8'],
	}
	Not_on_html = {	# We will not convert
		"\x22" => true,	# "
		"\x26" => true,	# &
		"\x3c" => true,	# <
		"\x3e" => true,	# >
	}

	@@entityreferece_charsets = Lists_to_apply.keys
	@@entityreferece_hash = Hash.new
	@@entityreferece_regexp = Hash.new

	@@entityreferece_charsets.each do |charset|
		@@entityreferece_hash[charset] = References[Lists_to_apply[charset][0]]
		Lists_to_apply[charset][1..-1].each do |src|
			@@entityreferece_hash[charset].merge!(References[src])
		end
		re_opt = (charset == 'UTF-8') ? 'u' : 'n'
		@@entityreferece_regexp[charset] = Regexp.union(
			*(@@entityreferece_hash[charset].keys.map do |c|
				Regexp.new(c, 0, re_opt)
			end)
		)
	end

	def referenciate(string, encoding = 'html', charset = 'iso-8859-1')
		charset_upper = charset.upcase
		return string unless @@entityreferece_charsets.index(charset_upper)

		no_conv = ('html' == encoding) ? Not_on_html : {}

		result = string.gsub(@@entityreferece_regexp[charset_upper]) do
			no_conv[$&] ? $& : @@entityreferece_hash[charset_upper][$&]
		end

		return result
	end
	module_function :referenciate

end
