# test cases for conversions between UCS and UTF-8
# taken from http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8

$:.unshift('lib')
require 'entity-reference/ucs-transformation'
require 'test/unit'

class TestUcsTransformation < Test::Unit::TestCase
	include EntityReference::UcsTransformation

	def test_ucs_to_utf8
		{
			0x00 => "\x00",
			0x0a => "\x0a",
			0xa9 => "\xc2\xa9",	# copyright sign
			0x2260 => "\xe2\x89\xa0",	# not equal to
		}.each_pair do |ucs, utf8|
			assert_equal(utf8, ucs_to_utf8(ucs))
		end
	end

	def test_utf8_to_ucs
		{
			"\x00" => 0x00,
			"\x0a" => 0x0a,
			"\xc2\xa9" => 0xa9,	# copyright sign
			"\xe2\x89\xa0" => 0x2260,	# not equal to
		}.each_pair do |utf8, ucs|
			assert_equal(ucs, utf8_to_ucs(utf8))
		end
	end

	def test_overlong_utf8
		[
			"\xc0\x8a",
			"\xe0\x80\x8a",
			"\xf0\x80\x80\x8a",
			"\xf8\x80\x80\x80\x8a",
			"\xfc\x80\x80\x80\x80\x8a",
		].each do |utf8|
			assert_raise(UcsTransformationError, utf8.inspect){utf8_to_ucs(utf8)}
		end
	end

	def test_overshort_utf8
		[
			"\xc2",
			"\xe2\x89",
		].each do |utf8|
			assert_raise(UcsTransformationError, utf8.inspect){utf8_to_ucs(utf8)}
		end
	end

	def test_overrange_ucs
		[ -1, 0xd800, 0xdfff, 0xfffe, 0xffff, 0x10000 ].each do |ucs|
			assert_raise(UcsTransformationError, "U+#{ucs.to_s(16)}"){ucs_to_utf8(ucs)}
		end
	end

	def test_overrange_utf8
		[ ].each do |utf8|	# correspoding UTF-8 byte stream to be calculated
			assert_raise(UcsTransformationError){utf8_to_ucs(utf8)}
		end
	end

end
