#!/usr/bin/ruby
#
# blazer-ja-proxy: a proxy server for Palm/Blazer
#
# $Id: blazer-ja-proxy,v 1.50 2007/08/27 19:08:24 zunda Exp $
#
# Copyright:: Copyright (C) 2006 zunda <zunda at freeshell.org>
# License:: GPL
#

#
# Entity reference list
#
# This following is generated by pre-setup.rb in ruby-entity-reference
# With definitions taken from http://www.w3.org/TR/html401/sgml/entities.html
#

module EntityReference
	References = {
		"internationalization" => {
			"\x22" => "&quot;",	# quotation mark = APL quote, U+0022 ISOnum
			"\x26" => "&amp;",	# ampersand, U+0026 ISOnum
			"\x3c" => "&lt;",	# less-than sign, U+003C ISOnum
			"\x3e" => "&gt;",	# greater-than sign, U+003E ISOnum
			"\x152" => "&OElig;",	# latin capital ligature OE, U+0152 ISOlat2
			"\x153" => "&oelig;",	# latin small ligature oe, U+0153 ISOlat2
			"\x160" => "&Scaron;",	# latin capital letter S with caron, U+0160 ISOlat2
			"\x161" => "&scaron;",	# latin small letter s with caron, U+0161 ISOlat2
			"\x178" => "&Yuml;",	# latin capital letter Y with diaeresis, U+0178 ISOlat2
			"\x2c6" => "&circ;",	# modifier letter circumflex accent, U+02C6 ISOpub
			"\x2dc" => "&tilde;",	# small tilde, U+02DC ISOdia
			"\x2002" => "&ensp;",	# en space, U+2002 ISOpub
			"\x2003" => "&emsp;",	# em space, U+2003 ISOpub
			"\x2009" => "&thinsp;",	# thin space, U+2009 ISOpub
			"\x200c" => "&zwnj;",	# zero width non-joiner, U+200C NEW RFC 2070
			"\x200d" => "&zwj;",	# zero width joiner, U+200D NEW RFC 2070
			"\x200e" => "&lrm;",	# left-to-right mark, U+200E NEW RFC 2070
			"\x200f" => "&rlm;",	# right-to-left mark, U+200F NEW RFC 2070
			"\x2013" => "&ndash;",	# en dash, U+2013 ISOpub
			"\x2014" => "&mdash;",	# em dash, U+2014 ISOpub
			"\x2018" => "&lsquo;",	# left single quotation mark, U+2018 ISOnum
			"\x2019" => "&rsquo;",	# right single quotation mark, U+2019 ISOnum
			"\x201a" => "&sbquo;",	# single low-9 quotation mark, U+201A NEW
			"\x201c" => "&ldquo;",	# left double quotation mark, U+201C ISOnum
			"\x201d" => "&rdquo;",	# right double quotation mark, U+201D ISOnum
			"\x201e" => "&bdquo;",	# double low-9 quotation mark, U+201E NEW
			"\x2020" => "&dagger;",	# dagger, U+2020 ISOpub
			"\x2021" => "&Dagger;",	# double dagger, U+2021 ISOpub
			"\x2030" => "&permil;",	# per mille sign, U+2030 ISOtech
			"\x2039" => "&lsaquo;",	# single left-pointing angle quotation mark, U+2039 ISO proposed
			"\x203a" => "&rsaquo;",	# single right-pointing angle quotation mark, U+203A ISO proposed
			"\x20ac" => "&euro;",	# euro sign, U+20AC NEW
		},
		"iso-8859-1" => {
			"\xa0" => "&nbsp;",	# no-break space = non-breaking space, U+00A0 ISOnum
			"\xa1" => "&iexcl;",	# inverted exclamation mark, U+00A1 ISOnum
			"\xa2" => "&cent;",	# cent sign, U+00A2 ISOnum
			"\xa3" => "&pound;",	# pound sign, U+00A3 ISOnum
			"\xa4" => "&curren;",	# currency sign, U+00A4 ISOnum
			"\xa5" => "&yen;",	# yen sign = yuan sign, U+00A5 ISOnum
			"\xa6" => "&brvbar;",	# broken bar = broken vertical bar, U+00A6 ISOnum
			"\xa7" => "&sect;",	# section sign, U+00A7 ISOnum
			"\xa8" => "&uml;",	# diaeresis = spacing diaeresis, U+00A8 ISOdia
			"\xa9" => "&copy;",	# copyright sign, U+00A9 ISOnum
			"\xaa" => "&ordf;",	# feminine ordinal indicator, U+00AA ISOnum
			"\xab" => "&laquo;",	# left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
			"\xac" => "&not;",	# not sign, U+00AC ISOnum
			"\xad" => "&shy;",	# soft hyphen = discretionary hyphen, U+00AD ISOnum
			"\xae" => "&reg;",	# registered sign = registered trade mark sign, U+00AE ISOnum
			"\xaf" => "&macr;",	# macron = spacing macron = overline = APL overbar, U+00AF ISOdia
			"\xb0" => "&deg;",	# degree sign, U+00B0 ISOnum
			"\xb1" => "&plusmn;",	# plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
			"\xb2" => "&sup2;",	# superscript two = superscript digit two = squared, U+00B2 ISOnum
			"\xb3" => "&sup3;",	# superscript three = superscript digit three = cubed, U+00B3 ISOnum
			"\xb4" => "&acute;",	# acute accent = spacing acute, U+00B4 ISOdia
			"\xb5" => "&micro;",	# micro sign, U+00B5 ISOnum
			"\xb6" => "&para;",	# pilcrow sign = paragraph sign, U+00B6 ISOnum
			"\xb7" => "&middot;",	# middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
			"\xb8" => "&cedil;",	# cedilla = spacing cedilla, U+00B8 ISOdia
			"\xb9" => "&sup1;",	# superscript one = superscript digit one, U+00B9 ISOnum
			"\xba" => "&ordm;",	# masculine ordinal indicator, U+00BA ISOnum
			"\xbb" => "&raquo;",	# right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
			"\xbc" => "&frac14;",	# vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
			"\xbd" => "&frac12;",	# vulgar fraction one half = fraction one half, U+00BD ISOnum
			"\xbe" => "&frac34;",	# vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
			"\xbf" => "&iquest;",	# inverted question mark = turned question mark, U+00BF ISOnum
			"\xc0" => "&Agrave;",	# latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
			"\xc1" => "&Aacute;",	# latin capital letter A with acute, U+00C1 ISOlat1
			"\xc2" => "&Acirc;",	# latin capital letter A with circumflex, U+00C2 ISOlat1
			"\xc3" => "&Atilde;",	# latin capital letter A with tilde, U+00C3 ISOlat1
			"\xc4" => "&Auml;",	# latin capital letter A with diaeresis, U+00C4 ISOlat1
			"\xc5" => "&Aring;",	# latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
			"\xc6" => "&AElig;",	# latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
			"\xc7" => "&Ccedil;",	# latin capital letter C with cedilla, U+00C7 ISOlat1
			"\xc8" => "&Egrave;",	# latin capital letter E with grave, U+00C8 ISOlat1
			"\xc9" => "&Eacute;",	# latin capital letter E with acute, U+00C9 ISOlat1
			"\xca" => "&Ecirc;",	# latin capital letter E with circumflex, U+00CA ISOlat1
			"\xcb" => "&Euml;",	# latin capital letter E with diaeresis, U+00CB ISOlat1
			"\xcc" => "&Igrave;",	# latin capital letter I with grave, U+00CC ISOlat1
			"\xcd" => "&Iacute;",	# latin capital letter I with acute, U+00CD ISOlat1
			"\xce" => "&Icirc;",	# latin capital letter I with circumflex, U+00CE ISOlat1
			"\xcf" => "&Iuml;",	# latin capital letter I with diaeresis, U+00CF ISOlat1
			"\xd0" => "&ETH;",	# latin capital letter ETH, U+00D0 ISOlat1
			"\xd1" => "&Ntilde;",	# latin capital letter N with tilde, U+00D1 ISOlat1
			"\xd2" => "&Ograve;",	# latin capital letter O with grave, U+00D2 ISOlat1
			"\xd3" => "&Oacute;",	# latin capital letter O with acute, U+00D3 ISOlat1
			"\xd4" => "&Ocirc;",	# latin capital letter O with circumflex, U+00D4 ISOlat1
			"\xd5" => "&Otilde;",	# latin capital letter O with tilde, U+00D5 ISOlat1
			"\xd6" => "&Ouml;",	# latin capital letter O with diaeresis, U+00D6 ISOlat1
			"\xd7" => "&times;",	# multiplication sign, U+00D7 ISOnum
			"\xd8" => "&Oslash;",	# latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
			"\xd9" => "&Ugrave;",	# latin capital letter U with grave, U+00D9 ISOlat1
			"\xda" => "&Uacute;",	# latin capital letter U with acute, U+00DA ISOlat1
			"\xdb" => "&Ucirc;",	# latin capital letter U with circumflex, U+00DB ISOlat1
			"\xdc" => "&Uuml;",	# latin capital letter U with diaeresis, U+00DC ISOlat1
			"\xdd" => "&Yacute;",	# latin capital letter Y with acute, U+00DD ISOlat1
			"\xde" => "&THORN;",	# latin capital letter THORN, U+00DE ISOlat1
			"\xdf" => "&szlig;",	# latin small letter sharp s = ess-zed, U+00DF ISOlat1
			"\xe0" => "&agrave;",	# latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
			"\xe1" => "&aacute;",	# latin small letter a with acute, U+00E1 ISOlat1
			"\xe2" => "&acirc;",	# latin small letter a with circumflex, U+00E2 ISOlat1
			"\xe3" => "&atilde;",	# latin small letter a with tilde, U+00E3 ISOlat1
			"\xe4" => "&auml;",	# latin small letter a with diaeresis, U+00E4 ISOlat1
			"\xe5" => "&aring;",	# latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
			"\xe6" => "&aelig;",	# latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
			"\xe7" => "&ccedil;",	# latin small letter c with cedilla, U+00E7 ISOlat1
			"\xe8" => "&egrave;",	# latin small letter e with grave, U+00E8 ISOlat1
			"\xe9" => "&eacute;",	# latin small letter e with acute, U+00E9 ISOlat1
			"\xea" => "&ecirc;",	# latin small letter e with circumflex, U+00EA ISOlat1
			"\xeb" => "&euml;",	# latin small letter e with diaeresis, U+00EB ISOlat1
			"\xec" => "&igrave;",	# latin small letter i with grave, U+00EC ISOlat1
			"\xed" => "&iacute;",	# latin small letter i with acute, U+00ED ISOlat1
			"\xee" => "&icirc;",	# latin small letter i with circumflex, U+00EE ISOlat1
			"\xef" => "&iuml;",	# latin small letter i with diaeresis, U+00EF ISOlat1
			"\xf0" => "&eth;",	# latin small letter eth, U+00F0 ISOlat1
			"\xf1" => "&ntilde;",	# latin small letter n with tilde, U+00F1 ISOlat1
			"\xf2" => "&ograve;",	# latin small letter o with grave, U+00F2 ISOlat1
			"\xf3" => "&oacute;",	# latin small letter o with acute, U+00F3 ISOlat1
			"\xf4" => "&ocirc;",	# latin small letter o with circumflex, U+00F4 ISOlat1
			"\xf5" => "&otilde;",	# latin small letter o with tilde, U+00F5 ISOlat1
			"\xf6" => "&ouml;",	# latin small letter o with diaeresis, U+00F6 ISOlat1
			"\xf7" => "&divide;",	# division sign, U+00F7 ISOnum
			"\xf8" => "&oslash;",	# latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
			"\xf9" => "&ugrave;",	# latin small letter u with grave, U+00F9 ISOlat1
			"\xfa" => "&uacute;",	# latin small letter u with acute, U+00FA ISOlat1
			"\xfb" => "&ucirc;",	# latin small letter u with circumflex, U+00FB ISOlat1
			"\xfc" => "&uuml;",	# latin small letter u with diaeresis, U+00FC ISOlat1
			"\xfd" => "&yacute;",	# latin small letter y with acute, U+00FD ISOlat1
			"\xfe" => "&thorn;",	# latin small letter thorn, U+00FE ISOlat1
			"\xff" => "&yuml;",	# latin small letter y with diaeresis, U+00FF ISOlat1
		},
		"symbols-and-greek-letters" => {
			"\x192" => "&fnof;",	# latin small f with hook = function = florin, U+0192 ISOtech
			"\x391" => "&Alpha;",	# greek capital letter alpha, U+0391
			"\x392" => "&Beta;",	# greek capital letter beta, U+0392
			"\x393" => "&Gamma;",	# greek capital letter gamma, U+0393 ISOgrk3
			"\x394" => "&Delta;",	# greek capital letter delta, U+0394 ISOgrk3
			"\x395" => "&Epsilon;",	# greek capital letter epsilon, U+0395
			"\x396" => "&Zeta;",	# greek capital letter zeta, U+0396
			"\x397" => "&Eta;",	# greek capital letter eta, U+0397
			"\x398" => "&Theta;",	# greek capital letter theta, U+0398 ISOgrk3
			"\x399" => "&Iota;",	# greek capital letter iota, U+0399
			"\x39a" => "&Kappa;",	# greek capital letter kappa, U+039A
			"\x39b" => "&Lambda;",	# greek capital letter lambda, U+039B ISOgrk3
			"\x39c" => "&Mu;",	# greek capital letter mu, U+039C
			"\x39d" => "&Nu;",	# greek capital letter nu, U+039D
			"\x39e" => "&Xi;",	# greek capital letter xi, U+039E ISOgrk3
			"\x39f" => "&Omicron;",	# greek capital letter omicron, U+039F
			"\x3a0" => "&Pi;",	# greek capital letter pi, U+03A0 ISOgrk3
			"\x3a1" => "&Rho;",	# greek capital letter rho, U+03A1
			"\x3a3" => "&Sigma;",	# greek capital letter sigma, U+03A3 ISOgrk3
			"\x3a4" => "&Tau;",	# greek capital letter tau, U+03A4
			"\x3a5" => "&Upsilon;",	# greek capital letter upsilon, U+03A5 ISOgrk3
			"\x3a6" => "&Phi;",	# greek capital letter phi, U+03A6 ISOgrk3
			"\x3a7" => "&Chi;",	# greek capital letter chi, U+03A7
			"\x3a8" => "&Psi;",	# greek capital letter psi, U+03A8 ISOgrk3
			"\x3a9" => "&Omega;",	# greek capital letter omega, U+03A9 ISOgrk3
			"\x3b1" => "&alpha;",	# greek small letter alpha, U+03B1 ISOgrk3
			"\x3b2" => "&beta;",	# greek small letter beta, U+03B2 ISOgrk3
			"\x3b3" => "&gamma;",	# greek small letter gamma, U+03B3 ISOgrk3
			"\x3b4" => "&delta;",	# greek small letter delta, U+03B4 ISOgrk3
			"\x3b5" => "&epsilon;",	# greek small letter epsilon, U+03B5 ISOgrk3
			"\x3b6" => "&zeta;",	# greek small letter zeta, U+03B6 ISOgrk3
			"\x3b7" => "&eta;",	# greek small letter eta, U+03B7 ISOgrk3
			"\x3b8" => "&theta;",	# greek small letter theta, U+03B8 ISOgrk3
			"\x3b9" => "&iota;",	# greek small letter iota, U+03B9 ISOgrk3
			"\x3ba" => "&kappa;",	# greek small letter kappa, U+03BA ISOgrk3
			"\x3bb" => "&lambda;",	# greek small letter lambda, U+03BB ISOgrk3
			"\x3bc" => "&mu;",	# greek small letter mu, U+03BC ISOgrk3
			"\x3bd" => "&nu;",	# greek small letter nu, U+03BD ISOgrk3
			"\x3be" => "&xi;",	# greek small letter xi, U+03BE ISOgrk3
			"\x3bf" => "&omicron;",	# greek small letter omicron, U+03BF NEW
			"\x3c0" => "&pi;",	# greek small letter pi, U+03C0 ISOgrk3
			"\x3c1" => "&rho;",	# greek small letter rho, U+03C1 ISOgrk3
			"\x3c2" => "&sigmaf;",	# greek small letter final sigma, U+03C2 ISOgrk3
			"\x3c3" => "&sigma;",	# greek small letter sigma, U+03C3 ISOgrk3
			"\x3c4" => "&tau;",	# greek small letter tau, U+03C4 ISOgrk3
			"\x3c5" => "&upsilon;",	# greek small letter upsilon, U+03C5 ISOgrk3
			"\x3c6" => "&phi;",	# greek small letter phi, U+03C6 ISOgrk3
			"\x3c7" => "&chi;",	# greek small letter chi, U+03C7 ISOgrk3
			"\x3c8" => "&psi;",	# greek small letter psi, U+03C8 ISOgrk3
			"\x3c9" => "&omega;",	# greek small letter omega, U+03C9 ISOgrk3
			"\x3d1" => "&thetasym;",	# greek small letter theta symbol, U+03D1 NEW
			"\x3d2" => "&upsih;",	# greek upsilon with hook symbol, U+03D2 NEW
			"\x3d6" => "&piv;",	# greek pi symbol, U+03D6 ISOgrk3
			"\x2022" => "&bull;",	# bullet = black small circle, U+2022 ISOpub
			"\x2026" => "&hellip;",	# horizontal ellipsis = three dot leader, U+2026 ISOpub
			"\x2032" => "&prime;",	# prime = minutes = feet, U+2032 ISOtech
			"\x2033" => "&Prime;",	# double prime = seconds = inches, U+2033 ISOtech
			"\x203e" => "&oline;",	# overline = spacing overscore, U+203E NEW
			"\x2044" => "&frasl;",	# fraction slash, U+2044 NEW
			"\x2118" => "&weierp;",	# script capital P = power set = Weierstrass p, U+2118 ISOamso
			"\x2111" => "&image;",	# blackletter capital I = imaginary part, U+2111 ISOamso
			"\x211c" => "&real;",	# blackletter capital R = real part symbol, U+211C ISOamso
			"\x2122" => "&trade;",	# trade mark sign, U+2122 ISOnum
			"\x2135" => "&alefsym;",	# alef symbol = first transfinite cardinal, U+2135 NEW
			"\x2190" => "&larr;",	# leftwards arrow, U+2190 ISOnum
			"\x2191" => "&uarr;",	# upwards arrow, U+2191 ISOnum
			"\x2192" => "&rarr;",	# rightwards arrow, U+2192 ISOnum
			"\x2193" => "&darr;",	# downwards arrow, U+2193 ISOnum
			"\x2194" => "&harr;",	# left right arrow, U+2194 ISOamsa
			"\x21b5" => "&crarr;",	# downwards arrow with corner leftwards = carriage return, U+21B5 NEW
			"\x21d0" => "&lArr;",	# leftwards double arrow, U+21D0 ISOtech
			"\x21d1" => "&uArr;",	# upwards double arrow, U+21D1 ISOamsa
			"\x21d2" => "&rArr;",	# rightwards double arrow, U+21D2 ISOtech
			"\x21d3" => "&dArr;",	# downwards double arrow, U+21D3 ISOamsa
			"\x21d4" => "&hArr;",	# left right double arrow, U+21D4 ISOamsa
			"\x2200" => "&forall;",	# for all, U+2200 ISOtech
			"\x2202" => "&part;",	# partial differential, U+2202 ISOtech
			"\x2203" => "&exist;",	# there exists, U+2203 ISOtech
			"\x2205" => "&empty;",	# empty set = null set = diameter, U+2205 ISOamso
			"\x2207" => "&nabla;",	# nabla = backward difference, U+2207 ISOtech
			"\x2208" => "&isin;",	# element of, U+2208 ISOtech
			"\x2209" => "&notin;",	# not an element of, U+2209 ISOtech
			"\x220b" => "&ni;",	# contains as member, U+220B ISOtech
			"\x220f" => "&prod;",	# n-ary product = product sign, U+220F ISOamsb
			"\x2211" => "&sum;",	# n-ary sumation, U+2211 ISOamsb
			"\x2212" => "&minus;",	# minus sign, U+2212 ISOtech
			"\x2217" => "&lowast;",	# asterisk operator, U+2217 ISOtech
			"\x221a" => "&radic;",	# square root = radical sign, U+221A ISOtech
			"\x221d" => "&prop;",	# proportional to, U+221D ISOtech
			"\x221e" => "&infin;",	# infinity, U+221E ISOtech
			"\x2220" => "&ang;",	# angle, U+2220 ISOamso
			"\x2227" => "&and;",	# logical and = wedge, U+2227 ISOtech
			"\x2228" => "&or;",	# logical or = vee, U+2228 ISOtech
			"\x2229" => "&cap;",	# intersection = cap, U+2229 ISOtech
			"\x222a" => "&cup;",	# union = cup, U+222A ISOtech
			"\x222b" => "&int;",	# integral, U+222B ISOtech
			"\x2234" => "&there4;",	# therefore, U+2234 ISOtech
			"\x223c" => "&sim;",	# tilde operator = varies with = similar to, U+223C ISOtech
			"\x2245" => "&cong;",	# approximately equal to, U+2245 ISOtech
			"\x2248" => "&asymp;",	# almost equal to = asymptotic to, U+2248 ISOamsr
			"\x2260" => "&ne;",	# not equal to, U+2260 ISOtech
			"\x2261" => "&equiv;",	# identical to, U+2261 ISOtech
			"\x2264" => "&le;",	# less-than or equal to, U+2264 ISOtech
			"\x2265" => "&ge;",	# greater-than or equal to, U+2265 ISOtech
			"\x2282" => "&sub;",	# subset of, U+2282 ISOtech
			"\x2283" => "&sup;",	# superset of, U+2283 ISOtech
			"\x2284" => "&nsub;",	# not a subset of, U+2284 ISOamsn
			"\x2286" => "&sube;",	# subset of or equal to, U+2286 ISOtech
			"\x2287" => "&supe;",	# superset of or equal to, U+2287 ISOtech
			"\x2295" => "&oplus;",	# circled plus = direct sum, U+2295 ISOamsb
			"\x2297" => "&otimes;",	# circled times = vector product, U+2297 ISOamsb
			"\x22a5" => "&perp;",	# up tack = orthogonal to = perpendicular, U+22A5 ISOtech
			"\x22c5" => "&sdot;",	# dot operator, U+22C5 ISOamsb
			"\x2308" => "&lceil;",	# left ceiling = apl upstile, U+2308 ISOamsc
			"\x2309" => "&rceil;",	# right ceiling, U+2309 ISOamsc
			"\x230a" => "&lfloor;",	# left floor = apl downstile, U+230A ISOamsc
			"\x230b" => "&rfloor;",	# right floor, U+230B ISOamsc
			"\x2329" => "&lang;",	# left-pointing angle bracket = bra, U+2329 ISOtech
			"\x232a" => "&rang;",	# right-pointing angle bracket = ket, U+232A ISOtech
			"\x25ca" => "&loz;",	# lozenge, U+25CA ISOpub
			"\x2660" => "&spades;",	# black spade suit, U+2660 ISOpub
			"\x2663" => "&clubs;",	# black club suit = shamrock, U+2663 ISOpub
			"\x2665" => "&hearts;",	# black heart suit = valentine, U+2665 ISOpub
			"\x2666" => "&diams;",	# black diamond suit, U+2666 ISOpub
		},
		"utf-8" => {
			"\"" => "&quot;",	# quotation mark = APL quote, U+0022 ISOnum
			"&" => "&amp;",	# ampersand, U+0026 ISOnum
			"<" => "&lt;",	# less-than sign, U+003C ISOnum
			">" => "&gt;",	# greater-than sign, U+003E ISOnum
			"\305\222" => "&OElig;",	# latin capital ligature OE, U+0152 ISOlat2
			"\305\223" => "&oelig;",	# latin small ligature oe, U+0153 ISOlat2
			"\305\240" => "&Scaron;",	# latin capital letter S with caron, U+0160 ISOlat2
			"\305\241" => "&scaron;",	# latin small letter s with caron, U+0161 ISOlat2
			"\305\270" => "&Yuml;",	# latin capital letter Y with diaeresis, U+0178 ISOlat2
			"\313\206" => "&circ;",	# modifier letter circumflex accent, U+02C6 ISOpub
			"\313\234" => "&tilde;",	# small tilde, U+02DC ISOdia
			"\342\200\202" => "&ensp;",	# en space, U+2002 ISOpub
			"\342\200\203" => "&emsp;",	# em space, U+2003 ISOpub
			"\342\200\211" => "&thinsp;",	# thin space, U+2009 ISOpub
			"\342\200\214" => "&zwnj;",	# zero width non-joiner, U+200C NEW RFC 2070
			"\342\200\215" => "&zwj;",	# zero width joiner, U+200D NEW RFC 2070
			"\342\200\216" => "&lrm;",	# left-to-right mark, U+200E NEW RFC 2070
			"\342\200\217" => "&rlm;",	# right-to-left mark, U+200F NEW RFC 2070
			"\342\200\223" => "&ndash;",	# en dash, U+2013 ISOpub
			"\342\200\224" => "&mdash;",	# em dash, U+2014 ISOpub
			"\342\200\230" => "&lsquo;",	# left single quotation mark, U+2018 ISOnum
			"\342\200\231" => "&rsquo;",	# right single quotation mark, U+2019 ISOnum
			"\342\200\232" => "&sbquo;",	# single low-9 quotation mark, U+201A NEW
			"\342\200\234" => "&ldquo;",	# left double quotation mark, U+201C ISOnum
			"\342\200\235" => "&rdquo;",	# right double quotation mark, U+201D ISOnum
			"\342\200\236" => "&bdquo;",	# double low-9 quotation mark, U+201E NEW
			"\342\200\240" => "&dagger;",	# dagger, U+2020 ISOpub
			"\342\200\241" => "&Dagger;",	# double dagger, U+2021 ISOpub
			"\342\200\260" => "&permil;",	# per mille sign, U+2030 ISOtech
			"\342\200\271" => "&lsaquo;",	# single left-pointing angle quotation mark, U+2039 ISO proposed
			"\342\200\272" => "&rsaquo;",	# single right-pointing angle quotation mark, U+203A ISO proposed
			"\342\202\254" => "&euro;",	# euro sign, U+20AC NEW
			"\302\240" => "&nbsp;",	# no-break space = non-breaking space, U+00A0 ISOnum
			"\302\241" => "&iexcl;",	# inverted exclamation mark, U+00A1 ISOnum
			"\302\242" => "&cent;",	# cent sign, U+00A2 ISOnum
			"\302\243" => "&pound;",	# pound sign, U+00A3 ISOnum
			"\302\244" => "&curren;",	# currency sign, U+00A4 ISOnum
			"\302\245" => "&yen;",	# yen sign = yuan sign, U+00A5 ISOnum
			"\302\246" => "&brvbar;",	# broken bar = broken vertical bar, U+00A6 ISOnum
			"\302\247" => "&sect;",	# section sign, U+00A7 ISOnum
			"\302\250" => "&uml;",	# diaeresis = spacing diaeresis, U+00A8 ISOdia
			"\302\251" => "&copy;",	# copyright sign, U+00A9 ISOnum
			"\302\252" => "&ordf;",	# feminine ordinal indicator, U+00AA ISOnum
			"\302\253" => "&laquo;",	# left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
			"\302\254" => "&not;",	# not sign, U+00AC ISOnum
			"\302\255" => "&shy;",	# soft hyphen = discretionary hyphen, U+00AD ISOnum
			"\302\256" => "&reg;",	# registered sign = registered trade mark sign, U+00AE ISOnum
			"\302\257" => "&macr;",	# macron = spacing macron = overline = APL overbar, U+00AF ISOdia
			"\302\260" => "&deg;",	# degree sign, U+00B0 ISOnum
			"\302\261" => "&plusmn;",	# plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
			"\302\262" => "&sup2;",	# superscript two = superscript digit two = squared, U+00B2 ISOnum
			"\302\263" => "&sup3;",	# superscript three = superscript digit three = cubed, U+00B3 ISOnum
			"\302\264" => "&acute;",	# acute accent = spacing acute, U+00B4 ISOdia
			"\302\265" => "&micro;",	# micro sign, U+00B5 ISOnum
			"\302\266" => "&para;",	# pilcrow sign = paragraph sign, U+00B6 ISOnum
			"\302\267" => "&middot;",	# middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
			"\302\270" => "&cedil;",	# cedilla = spacing cedilla, U+00B8 ISOdia
			"\302\271" => "&sup1;",	# superscript one = superscript digit one, U+00B9 ISOnum
			"\302\272" => "&ordm;",	# masculine ordinal indicator, U+00BA ISOnum
			"\302\273" => "&raquo;",	# right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
			"\302\274" => "&frac14;",	# vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
			"\302\275" => "&frac12;",	# vulgar fraction one half = fraction one half, U+00BD ISOnum
			"\302\276" => "&frac34;",	# vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
			"\302\277" => "&iquest;",	# inverted question mark = turned question mark, U+00BF ISOnum
			"\303\200" => "&Agrave;",	# latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
			"\303\201" => "&Aacute;",	# latin capital letter A with acute, U+00C1 ISOlat1
			"\303\202" => "&Acirc;",	# latin capital letter A with circumflex, U+00C2 ISOlat1
			"\303\203" => "&Atilde;",	# latin capital letter A with tilde, U+00C3 ISOlat1
			"\303\204" => "&Auml;",	# latin capital letter A with diaeresis, U+00C4 ISOlat1
			"\303\205" => "&Aring;",	# latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
			"\303\206" => "&AElig;",	# latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
			"\303\207" => "&Ccedil;",	# latin capital letter C with cedilla, U+00C7 ISOlat1
			"\303\210" => "&Egrave;",	# latin capital letter E with grave, U+00C8 ISOlat1
			"\303\211" => "&Eacute;",	# latin capital letter E with acute, U+00C9 ISOlat1
			"\303\212" => "&Ecirc;",	# latin capital letter E with circumflex, U+00CA ISOlat1
			"\303\213" => "&Euml;",	# latin capital letter E with diaeresis, U+00CB ISOlat1
			"\303\214" => "&Igrave;",	# latin capital letter I with grave, U+00CC ISOlat1
			"\303\215" => "&Iacute;",	# latin capital letter I with acute, U+00CD ISOlat1
			"\303\216" => "&Icirc;",	# latin capital letter I with circumflex, U+00CE ISOlat1
			"\303\217" => "&Iuml;",	# latin capital letter I with diaeresis, U+00CF ISOlat1
			"\303\220" => "&ETH;",	# latin capital letter ETH, U+00D0 ISOlat1
			"\303\221" => "&Ntilde;",	# latin capital letter N with tilde, U+00D1 ISOlat1
			"\303\222" => "&Ograve;",	# latin capital letter O with grave, U+00D2 ISOlat1
			"\303\223" => "&Oacute;",	# latin capital letter O with acute, U+00D3 ISOlat1
			"\303\224" => "&Ocirc;",	# latin capital letter O with circumflex, U+00D4 ISOlat1
			"\303\225" => "&Otilde;",	# latin capital letter O with tilde, U+00D5 ISOlat1
			"\303\226" => "&Ouml;",	# latin capital letter O with diaeresis, U+00D6 ISOlat1
			"\303\227" => "&times;",	# multiplication sign, U+00D7 ISOnum
			"\303\230" => "&Oslash;",	# latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
			"\303\231" => "&Ugrave;",	# latin capital letter U with grave, U+00D9 ISOlat1
			"\303\232" => "&Uacute;",	# latin capital letter U with acute, U+00DA ISOlat1
			"\303\233" => "&Ucirc;",	# latin capital letter U with circumflex, U+00DB ISOlat1
			"\303\234" => "&Uuml;",	# latin capital letter U with diaeresis, U+00DC ISOlat1
			"\303\235" => "&Yacute;",	# latin capital letter Y with acute, U+00DD ISOlat1
			"\303\236" => "&THORN;",	# latin capital letter THORN, U+00DE ISOlat1
			"\303\237" => "&szlig;",	# latin small letter sharp s = ess-zed, U+00DF ISOlat1
			"\303\240" => "&agrave;",	# latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
			"\303\241" => "&aacute;",	# latin small letter a with acute, U+00E1 ISOlat1
			"\303\242" => "&acirc;",	# latin small letter a with circumflex, U+00E2 ISOlat1
			"\303\243" => "&atilde;",	# latin small letter a with tilde, U+00E3 ISOlat1
			"\303\244" => "&auml;",	# latin small letter a with diaeresis, U+00E4 ISOlat1
			"\303\245" => "&aring;",	# latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
			"\303\246" => "&aelig;",	# latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
			"\303\247" => "&ccedil;",	# latin small letter c with cedilla, U+00E7 ISOlat1
			"\303\250" => "&egrave;",	# latin small letter e with grave, U+00E8 ISOlat1
			"\303\251" => "&eacute;",	# latin small letter e with acute, U+00E9 ISOlat1
			"\303\252" => "&ecirc;",	# latin small letter e with circumflex, U+00EA ISOlat1
			"\303\253" => "&euml;",	# latin small letter e with diaeresis, U+00EB ISOlat1
			"\303\254" => "&igrave;",	# latin small letter i with grave, U+00EC ISOlat1
			"\303\255" => "&iacute;",	# latin small letter i with acute, U+00ED ISOlat1
			"\303\256" => "&icirc;",	# latin small letter i with circumflex, U+00EE ISOlat1
			"\303\257" => "&iuml;",	# latin small letter i with diaeresis, U+00EF ISOlat1
			"\303\260" => "&eth;",	# latin small letter eth, U+00F0 ISOlat1
			"\303\261" => "&ntilde;",	# latin small letter n with tilde, U+00F1 ISOlat1
			"\303\262" => "&ograve;",	# latin small letter o with grave, U+00F2 ISOlat1
			"\303\263" => "&oacute;",	# latin small letter o with acute, U+00F3 ISOlat1
			"\303\264" => "&ocirc;",	# latin small letter o with circumflex, U+00F4 ISOlat1
			"\303\265" => "&otilde;",	# latin small letter o with tilde, U+00F5 ISOlat1
			"\303\266" => "&ouml;",	# latin small letter o with diaeresis, U+00F6 ISOlat1
			"\303\267" => "&divide;",	# division sign, U+00F7 ISOnum
			"\303\270" => "&oslash;",	# latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
			"\303\271" => "&ugrave;",	# latin small letter u with grave, U+00F9 ISOlat1
			"\303\272" => "&uacute;",	# latin small letter u with acute, U+00FA ISOlat1
			"\303\273" => "&ucirc;",	# latin small letter u with circumflex, U+00FB ISOlat1
			"\303\274" => "&uuml;",	# latin small letter u with diaeresis, U+00FC ISOlat1
			"\303\275" => "&yacute;",	# latin small letter y with acute, U+00FD ISOlat1
			"\303\276" => "&thorn;",	# latin small letter thorn, U+00FE ISOlat1
			"\303\277" => "&yuml;",	# latin small letter y with diaeresis, U+00FF ISOlat1
			"\306\222" => "&fnof;",	# latin small f with hook = function = florin, U+0192 ISOtech
			"\316\221" => "&Alpha;",	# greek capital letter alpha, U+0391
			"\316\222" => "&Beta;",	# greek capital letter beta, U+0392
			"\316\223" => "&Gamma;",	# greek capital letter gamma, U+0393 ISOgrk3
			"\316\224" => "&Delta;",	# greek capital letter delta, U+0394 ISOgrk3
			"\316\225" => "&Epsilon;",	# greek capital letter epsilon, U+0395
			"\316\226" => "&Zeta;",	# greek capital letter zeta, U+0396
			"\316\227" => "&Eta;",	# greek capital letter eta, U+0397
			"\316\230" => "&Theta;",	# greek capital letter theta, U+0398 ISOgrk3
			"\316\231" => "&Iota;",	# greek capital letter iota, U+0399
			"\316\232" => "&Kappa;",	# greek capital letter kappa, U+039A
			"\316\233" => "&Lambda;",	# greek capital letter lambda, U+039B ISOgrk3
			"\316\234" => "&Mu;",	# greek capital letter mu, U+039C
			"\316\235" => "&Nu;",	# greek capital letter nu, U+039D
			"\316\236" => "&Xi;",	# greek capital letter xi, U+039E ISOgrk3
			"\316\237" => "&Omicron;",	# greek capital letter omicron, U+039F
			"\316\240" => "&Pi;",	# greek capital letter pi, U+03A0 ISOgrk3
			"\316\241" => "&Rho;",	# greek capital letter rho, U+03A1
			"\316\243" => "&Sigma;",	# greek capital letter sigma, U+03A3 ISOgrk3
			"\316\244" => "&Tau;",	# greek capital letter tau, U+03A4
			"\316\245" => "&Upsilon;",	# greek capital letter upsilon, U+03A5 ISOgrk3
			"\316\246" => "&Phi;",	# greek capital letter phi, U+03A6 ISOgrk3
			"\316\247" => "&Chi;",	# greek capital letter chi, U+03A7
			"\316\250" => "&Psi;",	# greek capital letter psi, U+03A8 ISOgrk3
			"\316\251" => "&Omega;",	# greek capital letter omega, U+03A9 ISOgrk3
			"\316\261" => "&alpha;",	# greek small letter alpha, U+03B1 ISOgrk3
			"\316\262" => "&beta;",	# greek small letter beta, U+03B2 ISOgrk3
			"\316\263" => "&gamma;",	# greek small letter gamma, U+03B3 ISOgrk3
			"\316\264" => "&delta;",	# greek small letter delta, U+03B4 ISOgrk3
			"\316\265" => "&epsilon;",	# greek small letter epsilon, U+03B5 ISOgrk3
			"\316\266" => "&zeta;",	# greek small letter zeta, U+03B6 ISOgrk3
			"\316\267" => "&eta;",	# greek small letter eta, U+03B7 ISOgrk3
			"\316\270" => "&theta;",	# greek small letter theta, U+03B8 ISOgrk3
			"\316\271" => "&iota;",	# greek small letter iota, U+03B9 ISOgrk3
			"\316\272" => "&kappa;",	# greek small letter kappa, U+03BA ISOgrk3
			"\316\273" => "&lambda;",	# greek small letter lambda, U+03BB ISOgrk3
			"\316\274" => "&mu;",	# greek small letter mu, U+03BC ISOgrk3
			"\316\275" => "&nu;",	# greek small letter nu, U+03BD ISOgrk3
			"\316\276" => "&xi;",	# greek small letter xi, U+03BE ISOgrk3
			"\316\277" => "&omicron;",	# greek small letter omicron, U+03BF NEW
			"\317\200" => "&pi;",	# greek small letter pi, U+03C0 ISOgrk3
			"\317\201" => "&rho;",	# greek small letter rho, U+03C1 ISOgrk3
			"\317\202" => "&sigmaf;",	# greek small letter final sigma, U+03C2 ISOgrk3
			"\317\203" => "&sigma;",	# greek small letter sigma, U+03C3 ISOgrk3
			"\317\204" => "&tau;",	# greek small letter tau, U+03C4 ISOgrk3
			"\317\205" => "&upsilon;",	# greek small letter upsilon, U+03C5 ISOgrk3
			"\317\206" => "&phi;",	# greek small letter phi, U+03C6 ISOgrk3
			"\317\207" => "&chi;",	# greek small letter chi, U+03C7 ISOgrk3
			"\317\210" => "&psi;",	# greek small letter psi, U+03C8 ISOgrk3
			"\317\211" => "&omega;",	# greek small letter omega, U+03C9 ISOgrk3
			"\317\221" => "&thetasym;",	# greek small letter theta symbol, U+03D1 NEW
			"\317\222" => "&upsih;",	# greek upsilon with hook symbol, U+03D2 NEW
			"\317\226" => "&piv;",	# greek pi symbol, U+03D6 ISOgrk3
			"\342\200\242" => "&bull;",	# bullet = black small circle, U+2022 ISOpub
			"\342\200\246" => "&hellip;",	# horizontal ellipsis = three dot leader, U+2026 ISOpub
			"\342\200\262" => "&prime;",	# prime = minutes = feet, U+2032 ISOtech
			"\342\200\263" => "&Prime;",	# double prime = seconds = inches, U+2033 ISOtech
			"\342\200\276" => "&oline;",	# overline = spacing overscore, U+203E NEW
			"\342\201\204" => "&frasl;",	# fraction slash, U+2044 NEW
			"\342\204\230" => "&weierp;",	# script capital P = power set = Weierstrass p, U+2118 ISOamso
			"\342\204\221" => "&image;",	# blackletter capital I = imaginary part, U+2111 ISOamso
			"\342\204\234" => "&real;",	# blackletter capital R = real part symbol, U+211C ISOamso
			"\342\204\242" => "&trade;",	# trade mark sign, U+2122 ISOnum
			"\342\204\265" => "&alefsym;",	# alef symbol = first transfinite cardinal, U+2135 NEW
			"\342\206\220" => "&larr;",	# leftwards arrow, U+2190 ISOnum
			"\342\206\221" => "&uarr;",	# upwards arrow, U+2191 ISOnum
			"\342\206\222" => "&rarr;",	# rightwards arrow, U+2192 ISOnum
			"\342\206\223" => "&darr;",	# downwards arrow, U+2193 ISOnum
			"\342\206\224" => "&harr;",	# left right arrow, U+2194 ISOamsa
			"\342\206\265" => "&crarr;",	# downwards arrow with corner leftwards = carriage return, U+21B5 NEW
			"\342\207\220" => "&lArr;",	# leftwards double arrow, U+21D0 ISOtech
			"\342\207\221" => "&uArr;",	# upwards double arrow, U+21D1 ISOamsa
			"\342\207\222" => "&rArr;",	# rightwards double arrow, U+21D2 ISOtech
			"\342\207\223" => "&dArr;",	# downwards double arrow, U+21D3 ISOamsa
			"\342\207\224" => "&hArr;",	# left right double arrow, U+21D4 ISOamsa
			"\342\210\200" => "&forall;",	# for all, U+2200 ISOtech
			"\342\210\202" => "&part;",	# partial differential, U+2202 ISOtech
			"\342\210\203" => "&exist;",	# there exists, U+2203 ISOtech
			"\342\210\205" => "&empty;",	# empty set = null set = diameter, U+2205 ISOamso
			"\342\210\207" => "&nabla;",	# nabla = backward difference, U+2207 ISOtech
			"\342\210\210" => "&isin;",	# element of, U+2208 ISOtech
			"\342\210\211" => "&notin;",	# not an element of, U+2209 ISOtech
			"\342\210\213" => "&ni;",	# contains as member, U+220B ISOtech
			"\342\210\217" => "&prod;",	# n-ary product = product sign, U+220F ISOamsb
			"\342\210\221" => "&sum;",	# n-ary sumation, U+2211 ISOamsb
			"\342\210\222" => "&minus;",	# minus sign, U+2212 ISOtech
			"\342\210\227" => "&lowast;",	# asterisk operator, U+2217 ISOtech
			"\342\210\232" => "&radic;",	# square root = radical sign, U+221A ISOtech
			"\342\210\235" => "&prop;",	# proportional to, U+221D ISOtech
			"\342\210\236" => "&infin;",	# infinity, U+221E ISOtech
			"\342\210\240" => "&ang;",	# angle, U+2220 ISOamso
			"\342\210\247" => "&and;",	# logical and = wedge, U+2227 ISOtech
			"\342\210\250" => "&or;",	# logical or = vee, U+2228 ISOtech
			"\342\210\251" => "&cap;",	# intersection = cap, U+2229 ISOtech
			"\342\210\252" => "&cup;",	# union = cup, U+222A ISOtech
			"\342\210\253" => "&int;",	# integral, U+222B ISOtech
			"\342\210\264" => "&there4;",	# therefore, U+2234 ISOtech
			"\342\210\274" => "&sim;",	# tilde operator = varies with = similar to, U+223C ISOtech
			"\342\211\205" => "&cong;",	# approximately equal to, U+2245 ISOtech
			"\342\211\210" => "&asymp;",	# almost equal to = asymptotic to, U+2248 ISOamsr
			"\342\211\240" => "&ne;",	# not equal to, U+2260 ISOtech
			"\342\211\241" => "&equiv;",	# identical to, U+2261 ISOtech
			"\342\211\244" => "&le;",	# less-than or equal to, U+2264 ISOtech
			"\342\211\245" => "&ge;",	# greater-than or equal to, U+2265 ISOtech
			"\342\212\202" => "&sub;",	# subset of, U+2282 ISOtech
			"\342\212\203" => "&sup;",	# superset of, U+2283 ISOtech
			"\342\212\204" => "&nsub;",	# not a subset of, U+2284 ISOamsn
			"\342\212\206" => "&sube;",	# subset of or equal to, U+2286 ISOtech
			"\342\212\207" => "&supe;",	# superset of or equal to, U+2287 ISOtech
			"\342\212\225" => "&oplus;",	# circled plus = direct sum, U+2295 ISOamsb
			"\342\212\227" => "&otimes;",	# circled times = vector product, U+2297 ISOamsb
			"\342\212\245" => "&perp;",	# up tack = orthogonal to = perpendicular, U+22A5 ISOtech
			"\342\213\205" => "&sdot;",	# dot operator, U+22C5 ISOamsb
			"\342\214\210" => "&lceil;",	# left ceiling = apl upstile, U+2308 ISOamsc
			"\342\214\211" => "&rceil;",	# right ceiling, U+2309 ISOamsc
			"\342\214\212" => "&lfloor;",	# left floor = apl downstile, U+230A ISOamsc
			"\342\214\213" => "&rfloor;",	# right floor, U+230B ISOamsc
			"\342\214\251" => "&lang;",	# left-pointing angle bracket = bra, U+2329 ISOtech
			"\342\214\252" => "&rang;",	# right-pointing angle bracket = ket, U+232A ISOtech
			"\342\227\212" => "&loz;",	# lozenge, U+25CA ISOpub
			"\342\231\240" => "&spades;",	# black spade suit, U+2660 ISOpub
			"\342\231\243" => "&clubs;",	# black club suit = shamrock, U+2663 ISOpub
			"\342\231\245" => "&hearts;",	# black heart suit = valentine, U+2665 ISOpub
			"\342\231\246" => "&diams;",	# black diamond suit, U+2666 ISOpub
		},
	}
end

#
# referenciate.rb:
# a library to markup text with HTML references
#
# copied from referenciate.rb,v 1.7 in ruby-entity-reference-0.1.4
#
# Copyright:: Copyright (C) 2007 zunda <zunda at freeshell.org>
# License:: GPL
#

module EntityReference
	Lists_to_apply = {
		'ISO-8859-1' => ['iso-8859-1', 'internationalization'],
		'UTF-8' => ['utf-8'],
	}
	Not_on_html = {	# We will not convert
		"\x22" => true,	# "
		"\x26" => true,	# &
		"\x3c" => true,	# <
		"\x3e" => true,	# >
	}

	@@entityreferece_charsets = Lists_to_apply.keys
	@@entityreferece_hash = Hash.new
	@@entityreferece_regexp = Hash.new

	@@entityreferece_charsets.each do |charset|
		@@entityreferece_hash[charset] = References[Lists_to_apply[charset][0]]
		Lists_to_apply[charset][1..-1].each do |src|
			@@entityreferece_hash[charset].merge!(References[src])
		end
		re_opt = (charset == 'UTF-8') ? 'u' : 'n'
		@@entityreferece_regexp[charset] = Regexp.union(
			*(@@entityreferece_hash[charset].keys.map do |c|
				Regexp.new(c, 0, re_opt)
			end)
		)
	end

	def referenciate(string, encoding = 'html', charset = 'iso-8859-1')
		charset_upper = charset.upcase
		return string unless @@entityreferece_charsets.index(charset_upper)

		no_conv = ('html' == encoding) ? Not_on_html : {}

		result = string.gsub(@@entityreferece_regexp[charset_upper]) do
			no_conv[$&] ? $& : @@entityreferece_hash[charset_upper][$&]
		end

		return result
	end
	module_function :referenciate

end

#
# blazer-ja-proxy: a proxy server for Palm/Blazer
#

require 'webrick'
require 'webrick/httpproxy'

require 'nkf'
require 'uri'
require 'zlib'
require 'stringio'
require 'thread'
require 'cgi'

require 'socket'

module BlazerProxy

	#
	# configuration file
	#
	class ConfigurationError < StandardError; end
	class Configuration
		attr_reader :path

		@@defaults = {
			'bind address' => Socket.gethostname,
			'bind port' => 8080,
			'proxy user' => Hash.new,
			'gzip' => true,
			'devel log' => nil,
			'server log' => nil,
			'server log depth' => WEBrick::Log::DEBUG,
			'browser charset' => NKF::EUC,
			'proxy via' => false,
		}

		@@specials = {
			'proxy user' => 'proxy user',
			'server log depth' => WEBrick::Log,
			# selection: fatal, error, warn, info, and debug, see webrick/log.rb
			'browser charset' => NKF,
			# selection: euc, sjis, jis, utf8, an utf16
		}

		@@paths = [
			'/etc/blazer-ja-proxy.conf',
			'~/.blazer-ja-proxy.conf',
			'./blazer-ja-proxy.conf',
		]
		def self::default_paths
			@@paths.dup
		end

		# items read from configuration file
		def [](entry)
			unless @confs.has_key?(entry) then
				raise ConfigurationError, "keyword `#{entry}' not known"
			end
			@confs[entry]
		end

		# verbosity:
		# 1: report unfatal errors
		# 2: report configuration file path
		# 3: report configurations
		attr_reader :verbosity
		attr_writer :verbosity

		# Array of error messages, check method raises
		attr_reader :errors

		# Full path of the configuration file read, or nil
		attr_reader :path

		def initialize(paths = @@paths, verbosity = 0)
			@confs = @@defaults.dup
			@path = nil
			@verbosity = verbosity
			@errors = Array.new
			_read(paths)
		end

		def _read(paths = @@paths)
			@path = nil
			paths.each do |path|
				fullpath = File.expand_path(path)
				begin
					File.open(fullpath) do |f|
						parse(f)
					end
					@path = fullpath
					if @verbosity > 1 then
						$stderr.puts "read configuration from #{fullpath}"
					end
					break
				rescue Errno::ENOENT
					if @verbosity > 0 then
						$stderr.puts "configuration file #{fullpath} not found"
					end
				end
			end
		end

		def parse(file)
			@confs = @@defaults.dup
			@errors = Array.new
			file.each_line do |l|
				l.sub!(/#.*/, '')
				l.strip!
				next if l.empty?
				k, v = l.split(/\s*:\s*/, 2)
				k.downcase!
				unless @@defaults.has_key?(k) then
					@errors.push("#{file.path}:#{file.lineno}: keyword `#{k}' not known")
					next
				end
				vv = (v and not v.empty?) ? v.untaint : nil
				unless @@specials.has_key?(k) then
					@confs[k.untaint] = vv
					if @verbosity > 2 then
						$stderr.puts "set configuration `#{k}' as #{vv ? "`#{vv}'" : 'nil'}"
					end
				else
					unless vv then
						@errors.push("#{file.path}:#{file.lineno}: keyword `#{k}' does not have a value")
						next
					end
					if 'proxy user' == k then
						name, p = vv.split(/:/, 2)
						crypt_pass = (p and not p.empty?) ? p : nil
						@confs['proxy user'][name] = crypt_pass
						if @verbosity > 2 then
							$stderr.puts "added user:`#{name}' to configuration `proxy user'"
						end
					else
						begin
							@confs[k] = @@specials[k].const_get(vv.upcase)
							if @verbosity > 2 then
								$stderr.puts "set configuration `#{k}' as #{vv ? "`#{vv}'" : 'nil'}"
							end
						rescue NameError
							@errors.push("#{file.path}:#{file.lineno}: value `#{v}' not accepted for keyword `#{k}'")
							next
						end
					end
				end
			end
		end

		def check
			unless @errors.empty? then
				raise ConfigurationError, @errors.join("\n")
			end
		end

	end

	#
	# charactor encodings
	#
	@@charset_to_nkf = {
		/\Aiso-8859-\d\Z/i => NKF::ASCII,
		/\Aeuc-jp\Z/i => NKF::EUC,
		/\Ax-sjis\Z/i => NKF::SJIS,
		/\Ashift[_-]jis\Z/i => NKF::SJIS,
		/\Aiso-2022-jp\Z/i => NKF::JIS,
		/\Autf-8\Z/i => NKF::UTF8,
		/\Autf-16\Z/i => NKF::UTF16,
	}

	# returns NKF::* const for charset string
	def self::charset_to_nkf(charset_string)
		@@charset_to_nkf.each_pair do |regex, result|
			return result if regex =~ charset_string
		end
		nil
	end

	@@nkf_to_charset = {
		NKF::ASCII => 'ISO-8859-1',
		NKF::EUC => 'EUC-JP',
		NKF::JIS => 'ISO-2022-JP',
		NKF::SJIS => 'Shift-JIS',
		NKF::UTF8 => 'UTF-8',
		NKF::UTF16 => 'UTF-16',
		NKF::BINARY => nil,
		nil => nil,
	}

	# returns a string showing charset
	def self::nkf_to_charset(nkf_const)
		if @@nkf_to_charset.has_key?(nkf_const) then
			@@nkf_to_charset[nkf_const] 
		else
			raise RuntimeError, "Unknown NKF:: constant #{nkf_const}"
		end
	end

	# returns NKF::* const for content type
	def self::content_type_to_nkf(content_type)
		if content_type then
			content_type.split(/\s*;\s*/).each do |part|
				a = part.split(/\s*=\s*/)
				return charset_to_nkf(a[1]) if a[1] and /charset/i =~ a[0]
			end
			#return charset_to_nkf('iso-8859-1') if /\Atext/i =~ content_type
			# violation of RFC-2616 Section 3.7.1 but we want to look in the body
			return NKF::BINARY if /\Aimage/i =~ content_type
		end
		return nil
	end

	# detects multibyte chars
	@@regexp_multibyte = Regexp.union(
		/[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/n,	# shift-jis
		/[\xa1-\xfe][\xa1-\xfe]/n,	# euc-jp
		/[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80\xbf]/n	# utf-8
	)	# Ruby recpie book
	@@regexp_7bit_jis = /\x1b\$B|\x1b\$\(D|\x0e\x1b\(I/	# jiskanji (5)
	def self::have_multibyte?(string)
		@@regexp_multibyte =~ string || @@regexp_7bit_jis =~ string
	end

	# sanitize a string
	def self::sanitize(string, charset_in)
		case charset_in
		when NKF::JIS
			return string
		when /\AISO-2022/i
			return string
		end
		return string.gsub(/\x1b[\(\$]./n, '')
	end

	# converts a string
	def self::convert(string, charset_out, charset_in = nil)
		return string unless have_multibyte?(string)
		opts = Array.new
		# output code
		case charset_out
		when NKF::EUC;   opts.push('-e')
		when NKF::SJIS;  opts.push('-s')
		when NKF::JIS;   opts.push('-j')
		when NKF::UTF8;  opts.push('-w')
		when NKF::UTF16; opts.push('-w16')
		end
		return string if opts.empty?
		# input code
		case charset_in
		when NKF::EUC;   opts.push('-E')
		when NKF::SJIS;  opts.push('-S')
		when NKF::JIS;   opts.push('-J')
		when NKF::UTF8;  opts.push('-W')
		when NKF::UTF16; opts.push('-W16')
		end
		# no mime decoding
		opts.push('-m0')
		# convert 
		NKF.nkf(opts.join(' '), self::sanitize(string, charset_in))
	end

	def self::convert_urlencoded(str, charset_out)
		# HTTPUtils::parse_query can't be used. Sequence has to be conserved.
		str.split(/(&)|(;)/).map{|chunk|
			if m = chunk.match(/=/) then
				[m.pre_match, m.post_match].map{|q|
					CGI.escape(BlazerProxy::convert(CGI.unescape(q), charset_out))
				}.join('=')
			elsif /\A(?:&|;)\Z/ =~ chunk
				chunk
			else
				CGI.escape(BlazerProxy::convert(CGI.unescape(chunk), charset_out))
			end
		}.join('')
	end

	@@crlf = "\x0d\x0a"
	def self::convert_form_data(str, boundary, charset_out)
		# HTTPUtils::parse_form_data can't be used. Sequence has to be conserved.
		str.split(/^(--#{boundary}(?:--)?#{@@crlf})/).map{|chunk|
			m = chunk.match(/#{@@crlf}#{@@crlf}/)
			if m then
				head = m.pre_match
				body = m.post_match

				# extract content-type
				headers = head.split(/#{@@crlf}/).map{|l| l.split(/:\s*/, 2)}
				if a = headers.find{|e| /\Acontent-type\z/i =~ e[0]} then
					content_type = a[1]
				else
					content_type = 'text/plain'
					# RFC2388: content type defaults to text/plain
				end

				# convert
				case content_type
				when /text\/plain/i
					body = BlazerProxy::convert(body, charset_out)
					chunk = head + @@crlf + @@crlf + body
				end
			end
			chunk
		}.join('')
	end

	#
	# URL
	#

	# returns canonical URL for URI
	def self::canonical_url(uri)
		return nil if not uri or uri.userinfo or uri.registry
		r = uri.dup
		r.query = nil
		r.opaque = nil
		r.fragment = nil
		r.normalize.to_s
	end

	#
	# registry of character encodings
	#

	class CharsetRegistry
		@@need_register = {
			NKF::ASCII => true,
			NKF::EUC => true,
			NKF::JIS => true,
			NKF::SJIS => true,
			NKF::UTF8 => true,
			NKF::UTF16 => true,
		}

		attr_reader :registry

		# remembers charsets for latest about max URLs
		def initialize(max = 1000)
			@registry = Hash.new
			@sequence = Array.new
			@max = max
			@mutex = Mutex.new
		end

		def [](uri)
			canonical = BlazerProxy::canonical_url(uri)
			if canonical then
				@mutex.synchronize do 
					@registry[canonical.to_sym]
				end
			else
				nil
			end
		end

		def []=(uri, charset)
			canonical = BlazerProxy::canonical_url(uri)
			if canonical and charset and @@need_register[charset] then
				# register
				s = canonical.to_sym
				@mutex.synchronize do 
					@sequence.delete(s) if @registry.has_key?(s)
					@registry[s] = charset
					@sequence.push(s)
				end

				# clean up old entries
				if @sequence.size > @max then
					halfmax = @max / 2
					@mutex.synchronize do 
						while @sequence.size > halfmax
							@registry.delete(@sequence.shift)
						end
					end
				end

				charset
			else
				nil
			end
		end

	end

	#
	# proxy server - we want to add proxy name to Via header
	#
	class BlazerProxyServer < WEBrick::HTTPProxyServer
		def initialize(config)
			super
			@via += ' (blazer-ja-proxy)'
		end
	end

	#
	# proxy instance
	#
	class Proxy
		attr_reader :auth_proc

		def initialize(
			conf
		)
			@bind_address = conf['bind address']
			@bind_port = conf['bind port']
			@proxy_pass = conf['proxy user']
			@gzip = conf['gzip']
			@devel_log_filename = conf['devel log']
			@server_log_filename = conf['server log']
			@server_log_depth = conf['server log depth']
			@proxy_via = conf['proxy via']
			@browser_charset = conf['browser charset']

			# charset registry
			@charsets = BlazerProxy::CharsetRegistry.new

			# development log file
			@devel_log_file = @devel_log_filename ? File.open(@devel_log_filename, 'w') : nil

			@auth_proc = Proc.new{|req, res|
				WEBrick::HTTPAuth.proxy_basic_auth(req, res, File.basename($0)) {|user, pass|
					begin
						@proxy_pass.has_key?(user) and pass.crypt(@proxy_pass[user]) == @proxy_pass[user]
					rescue ArgumentError	# salt too short
						false
					rescue TypeError	# can't convert nil into String
						true
					end
				}
			}

			@request_filter = Proc.new{|req, res|
				# record the request for debug for debug
				logging = @devel_log_file and req.request_uri and 'https' != req.request_uri.scheme
				if logging then
					@devel_log_file.puts "\nREQUEST:"
					@devel_log_file.puts "original: #{req.request_uri}"
					@devel_log_file.puts "content-type: #{req.content_type}"
					@devel_log_file.puts "body: #{req.body[0..65].inspect}" if req.body
				end

				# remember charset
				referer = begin
					URI.parse(req['referer'])
				rescue URI::InvalidURIError
					nil
				end
				charset_out = nil
				charset_out = (referer and @charsets[referer]) || @charsets[req.request_uri]
				if logging then
					@devel_log_file.puts "charset recorded in proxy: #{BlazerProxy::nkf_to_charset(charset_out) || 'none'}"
				end

				# convert
				req.convert_to!(charset_out)

				# record the result
				if logging then
					@devel_log_file.puts "converted: #{req.request_uri}"
					@devel_log_file.puts "content-type: #{req.content_type}"
					@devel_log_file.puts "body: #{req.body[0..65].inspect}" if req.body
				end
			}

			@response_filter = Proc.new{|req, res|
				# record the response header for debug
				logging = @devel_log_file and req.request_uri and 'https' != req.request_uri.scheme
				if logging then
					@devel_log_file.puts "\nORIGINAL RESPONSE HEADER for\n  #{res.request_uri}"
					res.each do |head, val|
						@devel_log_file.puts "#{head}: #{val}"
					end
				end

				case res.content_type
				when %r{\A(?:text/html|application/.*xhtml\+xml|text/plain)}
					# record charset
					@charsets[res.request_uri] = res.charset
					if logging then
						@devel_log_file.puts "charset detected in proxy: #{BlazerProxy::nkf_to_charset(res.charset)}"
					end
					# decode
					res.decode!
					# convert charset
					res.convert_to!(@browser_charset)
					# gzip
					if logging then
						if res.body then
							@devel_log_file.puts "\nbody after charset conversion:"
							@devel_log_file.puts res.body[0..195].inspect + '...'
						else
							@devel_log_file.puts "\nno body received."
						end
					end
					res.gzip! if @gzip
				when /\Aimage\//
					# shrink to fit display
				end

				# record the conveted response for debug
				if logging then
					@devel_log_file.puts "\nCONVERTED RESPONSE:"
					res.each do |head, val|
						@devel_log_file.puts "#{head}: #{val}"
					end
					@devel_log_file.puts "body:"
					if res.body then
						@devel_log_file.puts res.body[0..65].inspect + '...'
					end
					@devel_log_file.flush
				end
			}

			@server_config = {
				:BindAddress => @bind_address,
				:Port => @bind_port,
				:Logger => @server_log_filename ? WEBrick::Log::new(@server_log_filename, @server_log_depth) : nil,
				:ProxyAuthProc => @auth_proc,
				:RequestHandler => @request_filter,
				:ProxyContentHandler => @response_filter,
				:ProxyVia => @proxy_via,
			}
		end

		# start the server and block
		def start
			s = BlazerProxyServer.new(@server_config)
			Signal.trap('INT') do
				s.shutdown
				@devel_log_file.close if @devel_log_file
			end
			s.start
		end
	end

end

#
# extensions to WEBrick
#
module WEBrick
	class HTTPResponse

		# decode if encoded
		def decoded_body
			unless defined?(@decoded_body)
				# decode if encoded
				@decoded_body = body
				if body then
					if not body.empty? then
						case self['content-encoding']
						when 'gzip'
							gzfile = Zlib::GzipReader.new(StringIO.new(body))
							@decoded_body = gzfile.read
							gzfile.close
						when 'deflate'
							@decoded_body = Zlib::Inflate.inflate(body)
						end
					end
				end
			end
			@decoded_body
		end

		# returns NKF::* const
		def charset
			@charset = _charset unless defined?(@charset)
			@charset
		end

		def _charset
			# check Content-type: header
			r = ::BlazerProxy::content_type_to_nkf(content_type) and return r
			if body and not body.empty? then
				# check HTML header
				decoded_body.scan(/<meta\s+(.*?)>/i).map{|e| e[0]}.each do |meta|
					if /http-equiv=["']?content-type["']?/i =~ meta and /content=["'](.*?)["']/i =~ meta then
						r = ::BlazerProxy::content_type_to_nkf($1) and return r
					end
				end
				# check XML header
				# http://suika.fam.cx/~wakaba/-temp/wiki/wiki?XML%2F%2Fcharset
				if /\A(?:\xEF\xBB\xBF)?<\?xml/i =~ decoded_body then
					if /\A(?:\xEF\xBB\xBF)?<\?xml[^>]*\bencoding=["'](.*?)["'][^>]*>/i =~ decoded_body then
						r = ::BlazerProxy::charset_to_nkf($1) and return r
					end
					return NKF::UTF8
				end
				if content_type and /\bxml\b/ =~ content_type then
					return NKF::UTF16 if 0xFEFF ==  decoded_body.unpack('S1')[0]
					return NKF::UTF16 if 0xFFFE ==  decoded_body.unpack('S1')[0]
					return NKF::UTF8 if [0xEF, 0xBB, 0xBF] ==  decoded_body.unpack('C3')
				end
				# guess from body
				r = NKF::guess(decoded_body)
				return r unless NKF::UNKNOWN == r || NKF::BINARY == r
			end
			return nil
		end
		private :_charset

		# decode
		def decode!
			self.body = decoded_body
			self['content-encoding'] = nil
			self['content-length'] = @body ? @body.size : 0
			self
		end

		# encode with gzip
		def gzip!
			if not self['content-encoding'] or self['content-encoding'].empty? then
				bodyio = StringIO.new
				gzfile = Zlib::GzipWriter.new(bodyio)
				gzfile.print body
				gzfile.close
				self.body = bodyio.string
				self['content-encoding'] = 'gzip'
				self['content-length'] = @body ? @body.size : 0
			end
		end

		# convert character set
		def convert_to!(charset_out)
			# rewrite internal state
			charset_in = charset
			@charset = charset_out
			charset_str = BlazerProxy::nkf_to_charset(charset_out)

			# HTTP header
			s = content_type.sub(/;\s*charset\s*=\s*[^\s;]*/i, '') + "; charset=#{charset_str}"
			self.content_type = s
			return self if not body or body.empty?

			s = self.body

			# HTML header
			have_meta = false
			s = s.sub(/(<head\b(.*?)<\/head>)/im) do
				$1.gsub(/(<meta\s+(.*?)>)/im) do
					r = $1
					meta = $2
					if /http-equiv=["']?content-(:?.*?)type["']?/im =~ meta and /(content=["'].*?;(\s*.*?)["'])/im =~ meta then
						have_meta = true
						content_in = $1
						charset_from = $2
						content_out = content_in.sub(Regexp.new(Regexp.escape(charset_from)), " charset=#{charset_str}")
						r = r.sub(Regexp.new(Regexp.escape(content_in)), content_out)
					end
					r
				end
			end

			# insert meta tag if missing
			if /\Atext\/html/i =~ content_type and not have_meta then
				# create a space for <head>...</head>
				unless /<head\b.*<\/head>/im =~ s then
					s.sub!(/(<body>)/i){"<head></head>#{$1}"} or \
					s.sub!(/(<html>)/i){"#{$1}<head></head>"} or \
					s.sub!(/\A/){'<head></head>'}
				end
				# insert the meta tag
				s = s.sub(/(<\/head>)/i){
					end_head = $1
					need_post = (/\A<\?xml/ =~ s) || (/\A<!DOCTYPE.*XHTML/ =~ s)
					%Q|<meta http-equiv="content-type" content="text/html; charset=#{charset_str}"#{need_post ? '/' : ''}>#{end_head}|
				}

			end

			# XML header
			s = s.gsub(/(\A(?:\xEF\xBB\xBF)?<\?xml[^>]*\bencoding=["'](.*?)["'][^>]*>)/i) do
				r = $1
				charset_in_str = $2
				charset_in = ::BlazerProxy::charset_to_nkf(charset_in_str)
				r = r.sub(charset_in_str, charset_str)
				r
			end

			# convert body
			if /\Atext\/html/i =~ content_type then
				s = EntityReference::referenciate(s, 'html', BlazerProxy::nkf_to_charset(charset_in))
			end
			s = BlazerProxy::convert(s, charset_out, charset_in)

			self.body = s
			self['content-length'] = body.size.to_s

			self
		end
	end

	class HTTPRequest
		def unparsed_uri=(uri)
			@unparsed_uri = uri
			# The code below is copied from webrick/httprequest.rb,v 1.64
			# came with ruby 1.8.3 (2005-09-21)
			# Copyright (c) 2000, 2001 TAKAHASHI Masayoshi, GOTOU Yuuzou
			# Copyright (c) 2002 Internet Programming with Ruby writers.
			begin
				@request_uri = parse_uri(@unparsed_uri)
				@path = HTTPUtils::unescape(@request_uri.path)
				@path = HTTPUtils::normalize_path(@path)
				@host = @request_uri.host
				@port = @request_uri.port
				@query_string = @request_uri.query
				@script_name = ""
				@path_info = @path.dup
			rescue
				raise HTTPStatus::BadRequest, "bad URI `#{@unparsed_uri}'."
			end
		end

		def convert_query_to!(charset_out)
			a = URI.split(self.unparsed_uri)
			if a[7] and not a[7].empty? then
				a[7] = BlazerProxy::convert_urlencoded(a[7], charset_out)
				self.unparsed_uri = URI::Generic.build(a).to_s
			end
		end

		def convert_body_to!(charset_out)
			changed = false
			if body and not body.empty? then

				# convert body
				if /^application\/x-www-form-urlencoded/i =~ self['content-type'] then
					s = BlazerProxy::convert_urlencoded(body, charset_out)
					@body = s
					changed = true
				elsif /^multipart\/form-data;\s+boundary=(.+)/i =~ self['content-type'] then
					boundary = HTTPUtils::dequote($1)
					s = BlazerProxy::convert_form_data(body, boundary, charset_out)
					@body = s
					changgd = true
				end

				# update content-length
				if changed then
					newheader = @raw_header.map do |h|
						if /content-length/i =~ h then
							h.sub(/:(\s*).*/){':' + $1 + body.size.to_s}
						else
							h
						end
					end
					@raw_header = newheader
					@header = HTTPUtils::parse_header(@raw_header)
				end

			end
		end

		def convert_to!(charset_out)
			self.convert_query_to!(charset_out)
			self.convert_body_to!(charset_out)
		end
	end
end

if __FILE__ == $0 then
	confpath = BlazerProxy::Configuration::default_paths
	verbosity = 0

	require 'optparse'
	opt = OptionParser.new
	opt.on('-c PATH', '--conf', 'path to configuration file'){|v| confpath = [v]}
	opt.on('-v', '--verbose', 'increase verbosity'){verbosity += 1}
	opt.parse!(ARGV)

	conf = BlazerProxy::Configuration.new(confpath, verbosity)
	begin
		conf.check
	rescue BlazerProxy::ConfigurationError
		$stderr.puts $!
		exit 1
	end
	unless conf.path then
		$stderr.puts "config files: #{confpath.join(' ')} not found"
		exit 1
	end

	proxy = BlazerProxy::Proxy.new(conf)
	proxy.start
end
