/*
 * $Id: expatJP.c,v 1.3 2003/11/27 05:45:17 sugoroku Exp $
 * expat̓{ꕶR[hΉB
 * encodingSHIFT_JISEUC-JPw肳ꂽXML̏\ɂB
 * ܂UTF-8SHIFT_JISEEUC-JPւ̕ϊ[`񋟂B
 */

#include <string.h>
#include <expat.h>

#include <expatJP.h>

/* JIS<->UNICODEϊe[u */
#include "jutable.c"

/**
 * 1JIS208UNICODEɕϊB
 * @param ch   JIS208̕R[hB
 * @return ΉUNICODE̕R[hBΉ̂Ȃ0B
 */
static int JIS208toUTF16(int ch) {
	int c1 = (ch >> 8) & 0x7f;
	int c2 = ch & 0x7f;
	if (0x21 <= c1 && c1 <= 0x7e && 0x21 <= c2 && c2 <= 0x7e)
		return j2u[c1 - 0x21][c2 - 0x21];
	return 0;
}

/**
 * UNICODEɑΉJIS208̃R[hB
 * @param ch    UNICODE̕R[hB
 * @param start R[h̐擪B
 * @param end   R[h̏I[B
 * @return ΉJIS208̕R[hBΉ̂Ȃ0B
 */
static int searchJIS208(int ch, int start, int end) {
	int center = (start + end) / 2;
	if (u2j[center * 2] == ch)
		return u2j[center * 2 + 1];
	if (center == start)
		return 0;
	if (u2j[center * 2] > ch)
		return searchJIS208(ch, start, center);
	return searchJIS208(ch, center, end);
}

/**
 * 1UNICODEJIS208ɕϊB
 * @param ch UNICODE̕R[hB
 * @return ΉJIS208̕R[hBΉ̂Ȃ0B
 */
static int UTF16toJIS208(int ch) {
	return searchJIS208(ch, 0, sizeof(u2j) / sizeof(u2j[0]) / 2);
}

/**
 * JIS201JiUNICODEɕϊB
 * @param ch JIS201Ji̕R[hB
 * @return ΉUNICODE̕R[hBΉ̂Ȃ0B
 */
static int JIS201KANAtoUTF16(int ch) {
	if (0xa1 <= ch && ch <= 0xdf)
		return ch + 0xfec0;
	return 0;
}

/**
 * UNICODEJIS201JiɕϊB
 * @param ch UNICODE̕R[hB
 * @return ΉJIS201Ji̕R[hBΉ̂Ȃ0B
 */
static int UTF16toJIS201KANA(int ch) {
	ch &= 0xffff;
	if (0xff61 <= ch && ch <= 0xff9f)
		return ch - 0xfec0;
	return 0;
}

/**
 * EUC-JP̕񂩂ꕶ肾UNICODEɕϊB
 * @param s    EUC-JP̕B
 * @param next ̃XLʒui[AhXBNULLw肳ꂽꍇ͊i[ȂB
 * @return ΉUNICODE̕R[hBΉ̂Ȃ0B
 */
static int EUCJPtoUTF16(const char* s, const char** next) {
	int wc = 0;
	if ((*s & 0x80) == 0) {
		wc = *s++;
	}else if ((unsigned char) *s == 0x8e) {
		if ((wc = JIS201KANAtoUTF16((unsigned char) *(s + 1))) != 0)
			s += 2;
	}else if ((wc = JIS208toUTF16(((unsigned char) *s << 8) | (unsigned char) *(s + 1))) != 0) {
		s += 2;
	}
	if (next != NULL)
		*next = s;
	return wc;
}

/**
 * SHIFT_JIS̕񂩂ꕶ肾UNICODEɕϊB
 * @param s    SHIFT_JIS̕B
 * @param next ̃XLʒui[AhXBNULLw肳ꂽꍇ͊i[ȂB
 * @return ΉUNICODE̕R[hBΉ̂Ȃ0B
 */
static int SJIStoUTF16(const char* s, const char** next) {
	int wc = 0;
	int c1 = (unsigned char) *s;
	int c2 = (unsigned char) *(s + 1);
	if ((c1 & 0x80) == 0) {
		wc = c1;
		s++;
	}else if (0xa1 <= c1 && c1 <= 0xdf) {
		if ((wc = JIS201KANAtoUTF16(c1)) != 0)
			s++;
	}else{
		if (0x9f <= c2) {
			c1 = c1 * 2 - (c1 >= 0xe0 ? 0xe0 : 0x60);
			c2 += 2;
		}else{
			c1 = c1 * 2 - (c1 >= 0xe0 ? 0xe1 : 0x61);
			c2 += c2 < 0x7f ? 0x61 : 0x60;
		}
		if ((wc = JIS208toUTF16((c1 << 8) | c2)) != 0)
			s += 2;
	}
	if (next != NULL)
		*next = s;
	return wc;
}

/**
 * UTF8̕񂩂ꕶ肾UNICODEɕϊB
 * @param p    UTF8̕B
 * @param next ̃XLʒui[AhXBNULLw肳ꂽꍇ͊i[ȂB
 * @return ΉUNICODE̕R[hBΉ̂Ȃ0B
 */
static wchar_t UTF8toUTF16(const char* p, const char** next) {
	int wc = 0;
	int c1 = (unsigned char) *p;
	int c2 = (unsigned char) *(p + 1);
	int c3 = (unsigned char) *(p + 2);
	if (c1 <= 0x7f) {
		wc = c1;
		p++;
	}else if (0xc0 <= c1 && c1 <= 0xdf && 0x80 <= c2 && c2 <= 0xbf) {
		wc = ((c1 & 0x1f) << 6) | (c2 & 0x3f);
		p += 2;
	}else if (0xe0 <= c1 && c1 <= 0xef && 0x80 <= c2 && c2 <= 0xbf && 0x80 <= c3 && c3 <= 0xbf) {
		wc = ((c1 & 0x1f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f);
		p += 3;
	}
	if (next != NULL)
		*next = p;
	return wc;
}

/**
 * UTF8̕񂩂ꕶ肾EUC-JPɕϊB
 * @param p    UTF8̕B
 * @param next ̃XLʒui[AhXBNULLw肳ꂽꍇ͊i[ȂB
 * @return ΉEUC-JP̕R[hBΉ̂Ȃ0B
 */
static int UTF8toEUCJPchar(const char* p, const char** next) {
	wchar_t wc = UTF8toUTF16(p, next);
	int ec = 0;
	if ((wc & 0xff80) == 0)
		return wc;
	if ((ec = UTF16toJIS201KANA(wc)) != 0)
		return ec | 0x8e00;
	if ((ec = UTF16toJIS208(wc)) != 0)
		return ec | 0x8080;
	return 0;
}
/**
 * UTF8̕񂩂ꕶ肾SHIFT_JISɕϊB
 * @param p    UTF8̕
 * @param next ̃XLʒui[AhXBNULLw肳ꂽꍇ͊i[ȂB
 * @return ΉSHIFT_JIS̕R[hBΉ̂Ȃ0B
 */
static int UTF8toSJISchar(const char* p, const char** next) {
	wchar_t wc = UTF8toUTF16(p, next);
	int ec = 0;
	if ((wc & 0xff00) == 0)
		return wc;
	if ((ec = UTF16toJIS201KANA(wc)) != 0)
		return ec;
	if ((ec = UTF16toJIS208(wc)) != 0) {
		int c1 = (ec >> 8) & 0x7f | 0x80;
		int c2 = ec & 0x7f | 0x80;
		if ((c1 & 1) != 0) {
			c1 = (c1 >> 1) + (c1 < 0xdf ? 0x31 : 0x71);
			c2 -= c2 < 0xe0 ? 0x61 : 0x60;
		}else{
			c1 = (c1 >> 1) + (c1 < 0xdf ? 0x30 : 0x70);
			c2 -= 2;
		}
		return (c1 << 8) | c2;
	}
	return 0;
}

/**
 *  UTF8̕EUC-JPɕϊB
 *  @param utf8     UTF8̕
 *  @param utf8len  ̒B-1w肷ƏI[܂łϊB
 *  @param eucjp    EUC-JPɕϊi[obt@B
 *  @param eucjplen obt@̒B0w肷Ǝۂɂ͕ϊs킸Aϊ̕񒷂擾B
 *  @return ϊ̕񒷁B
 */
int UTF8toEUCJP(const char* utf8, int utf8len, char* eucjp, int eucjplen) {
	int len = 0;
	const char* lim = utf8;
	if (utf8len < 0) {
		while (*lim++ != '\0')
			;
	}else{
		lim += utf8len;
	}
	while (utf8 < lim && (eucjplen == 0 || len < eucjplen)) {
		int ec = UTF8toEUCJPchar(utf8, &utf8);
		if ((ec & 0xff00) != 0) {
			if (eucjplen != 0 && len < eucjplen)
				*eucjp++ = (ec >> 8) & 0xff;
			len++;
		}
		if (eucjplen != 0 && len < eucjplen)
			*eucjp++ = ec & 0xff;
		len++;
	}
	return len;
}

/**
 * UTF8̕SHIFT_JISɕϊB
 * @param utf8    UTF8̕
 * @param utf8len ̒B-1w肷ƏI[܂łϊB
 * @param sjis    SHIFT_JISɕϊi[obt@B
 * @param sjislen obt@̒B0w肷Ǝۂɂ͕ϊs킸Aϊ̕񒷂擾B
 * @return ϊ̕񒷁B
 */
int UTF8toSJIS(const char* utf8, int utf8len, char* sjis, int sjislen) {
	int len = 0;
	const char* lim = utf8;
	if (utf8len < 0) {
		while (*lim++ != '\0')
			;
	}else{
		lim += utf8len;
	}
	while (utf8 < lim && (sjislen == 0 || len < sjislen)) {
		int ec = UTF8toSJISchar(utf8, &utf8);
		if ((ec & 0xff00) != 0) {
			if (sjislen != 0 && len < sjislen)
				*sjis++ = (ec >> 8) & 0xff;
			len++;
		}
		if (sjislen != 0 && len < sjislen)
			*sjis++ = ec & 0xff;
		len++;
	}
	return len;
}

/**
 * EUC-JP̕񂩂1肾UNICODEɕϊB
 * @param data [U`f[^(gp)
 * @param s    EUC-JP̕B
 * @return ΉUNICODE̕R[hBΉ̂Ȃ0B
 */
static int convertEUCJPtoUTF16(void *data, const char *s) {
	return EUCJPtoUTF16(s, NULL);
}

/**
 * SHIFT_JIS̕񂩂1肾UNICODEɕϊB
 * @param data [U`f[^(gp)
 * @param s    SHIFT_JIS̕B
 * @return ΉUNICODE̕R[hBΉ̂Ȃ0B
 */
static int convertSHIFTJIStoUTF16(void *data, const char *s) {
	return SJIStoUTF16(s, NULL);
}

/**
 * ϊe[uASCIIݒ肷B
 * @param map ϊe[u
 */
static void setAsciiMap(int map[]) {
	int i;
	map['\t'] = '\t';
	map['\r'] = '\r';
	map['\n'] = '\n';
	for (i = ' '; i <= '~'; i++)
		map[i] = i;
}

/**
 * {ꕶR[h邽߂̃nh
 * @param encodingHandlerData ̃nĥ߂̃[U`f[^(gp)
 * @param name R[h(̃nhłSHIFT_JISEUC-JP̂ݑΉ)
 * @param info R[hϊ̂߂̏
 * @return ̃nhŏŝłΔ0B
 */
int XML_JapaneseEncodingHandler(void *encodingHandlerData, const XML_Char *name, XML_Encoding *info) {
	if (stricmp(name, "euc-jp") == 0) {
		int i;
		setAsciiMap(info->map);
		info->map[0x8e] = -2;
		for (i = 0xa1; i <= 0xfe; i++)
			info->map[i] = -2;
		info->convert = convertEUCJPtoUTF16;
		return 1;
	}else if(stricmp(name, "shift_jis") == 0) {
		int i;
		setAsciiMap(info->map);
		for (i = 0x81; i <= 0x9f; i++)
			info->map[i] = -2;
		for (i = 0xa1; i <= 0xdf; i++)
			info->map[i] = JIS201KANAtoUTF16(i);
		for (i = 0xe0; i <= 0xfc; i++)
			info->map[i] = -2;
		info->convert = convertSHIFTJIStoUTF16;
		return 1;
	}
	return 0;
}

/*
 * Changes:
 * $Log: expatJP.c,v $
 * Revision 1.3  2003/11/27 05:45:17  sugoroku
 * CVSL[[hɃt@ĈŃRg̓t@C폜
 *
 * Revision 1.2  2003/11/27 05:40:39  sugoroku
 * CVSL[[hRgɑ}
 *
 */
