#
# sgml-to-hash.rb:
# a library to read SGML files and store the entity defnitions as Hash
#
# $Id: sgml-list.rb,v 1.8 2007/01/26 03:15:17 zunda Exp $
#
# Copyright:: Copyright (C) 2007 zunda <zunda at freeshell.org>
# License:: GPL
#

module EntityReference
	module SgmlList
		class SgmlListError < StandardError; end

		class Entity
			TAG_REGEXP = /\A<!ENTITY\s*([^\s]+)\s*CDATA\s*"&#(.+);"\s*(?:--(.+))?-->\z/
			attr_reader :reference, :code, :comment

			# returns an Entity read from a tag: `<!ENITITY ...  -->'
			def self::parse(tag_string)
				scanned = tag_string.gsub(/\s*\n+\s*/, ' ').strip.scan(TAG_REGEXP)
				if scanned.empty?
					raise SgmlListError, "could not parse #{tag_string.inspect}"
				end
				ref, code_str, comment = scanned[0]
				comment.strip! if comment
				return Entity.new(ref, Integer(code_str), comment)
			end

			def initialize(reference, code, comment)
				@reference = reference	#	amp
				@code = code	# 38
				@comment = comment	# ampersand, U+0026 ISOnum
			end
		end

		class Entities
			ENTITY_REGEXP = /<!ENTITY.*?>/m
			COMMENT_REGEXP = /<!--.*?-->/m

			attr_reader :entities, :reference, :code

			def self.parse(sgml_string)
				r = Entities.new
				r.parse(sgml_string)
				return r
			end

			def initialize
				@entities = Array.new
				@reference = Hash.new
				@code = Hash.new
			end

			def parse(sgml_string)
				sgml_string.gsub(COMMENT_REGEXP, '').scan(ENTITY_REGEXP).each do |line|
					e = Entity.parse(line)
					push(e)
				end
			end

			def push(entity)
				if @code.has_key?(entity.reference)
					raise SgmlListError, "duplicate entries for &#{entity.reference};"
				end
				if @reference.has_key?(entity.code)
					raise SgmlListError, "duplicate entries for &##{entity.code};"
				end
				@entities.push(entity)
				@code[entity.reference] = entity.code
				@reference[entity.code] = entity.reference
			end

			def reference_hash_literal(comment = nil, prefix = '')
				return "{" + (comment ? "\t# #{comment}\n" : "\n") + @entities.map{|e|
					begin
						c = '"\x' + e.code.to_s(16)	+ '"' # Fixnum
					rescue ArgumentError
						c = e.code.dump	# String
					end
					%Q|#{prefix}\t#{c} => "&#{e.reference};",\t# #{e.comment}\n|
				}.join('') + prefix + "}"
			end

		end

	end
end
