/******************************************************************************/
/*! @file charset.cc
    @brief charset functions
    @author Masashi Astro Tachibana, Apolloron Project.
 ******************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if __ICONV == 1
#include <errno.h>
#include <sys/errno.h>
#include <iconv.h>
#endif

#include "apolloron.h"
#include "charset/table_unicode_eucjp.h"
#include "charset/table_sjis_unicode.h"
#include "charset/table_unicode_euckr.h"
#include "charset/table_euckr_unicode.h"
#include "charset/table_unicode_gbk.h"
#include "charset/table_gbk_unicode.h"
#include "charset/table_unicode_big5.h"
#include "charset/table_big5_unicode.h"
#include "charset/table_iso8859_unicode.h"
#include "charset/table_europe_unicode.h"
#include "charset/table_charwidth.h"
#include "charset.h"


namespace apolloron {

const char *STR_ISO8859_1  = "ISO-8859-1";
const char *STR_ISO8859_2  = "ISO-8859-2";
const char *STR_ISO8859_3  = "ISO-8859-3";
const char *STR_ISO8859_4  = "ISO-8859-4";
const char *STR_ISO8859_5  = "ISO-8859-5";
const char *STR_ISO8859_6  = "ISO-8859-6";
const char *STR_ISO8859_7  = "ISO-8859-7";
const char *STR_ISO8859_8  = "ISO-8859-8";
const char *STR_ISO8859_9  = "ISO-8859-9";
const char *STR_ISO8859_10 = "ISO-8859-10";
const char *STR_ISO8859_13 = "ISO-8859-13";
const char *STR_ISO8859_14 = "ISO-8859-14";
const char *STR_ISO8859_15 = "ISO-8859-15";
const char *STR_ISO8859_16 = "ISO-8859-16";
const char *STR_KOI8_R     = "KOI8-R";
const char *STR_KOI8_U     = "KOI8-U";
const char *STR_CP1251     = "CP1251";
const char *STR_CP1252     = "CP1252";
const char *STR_UTF8       = "UTF-8";
const char *STR_UTF7       = "UTF-7";
const char *STR_UTF7_IMAP  = "UTF-7-IMAP";
const char *STR_SJIS       = "CP932";
const char *STR_JIS        = "ISO-2022-JP";
const char *STR_EUCJP      = "EUC-JP";
const char *STR_ASCII      = "US-ASCII";
const char *STR_AUTOJP     = "AUTODETECT_JP";
const char *STR_EUCKR      = "CP949";
const char *STR_BIG5       = "CP950";
const char *STR_GBK        = "GB18030";
const char *STR_AUTO       = "AUTODETECT";


/*! Multi-character set converter
    @param str          Source text.
    @param src_charset  Character set of input. (ex. "UTF-8", "ISO-2022-JP", etc..)
    @param dest_charset Character set of output. (ex. "UTF-8", "ISO-2022-JP", etc..)
    @return Converted text
 */
char* charset_convert(const char* str, const char *src_charset, const char *dest_charset) {
    char *buf;
    int converted;
    const char *src_ch, *dest_ch;
    long length;
#if __ICONV == 0
    char *buf1;
#else
    iconv_t cd;
#endif

    if (str == (const char *)NULL) {
        return NULL;
    }

    length = strlen(str);

    buf = NULL;
    converted = 0;

    if (src_charset == NULL || src_charset[0] == '\0') {
        src_ch = STR_AUTO;
    } else if (!strncasecmp(src_charset, "ASCII", 5) || !strncasecmp(src_charset, "US-ASCII", 8)) {
        src_ch = STR_ASCII;
    } else if (!strncasecmp(src_charset, "EUC-JP", 6) || !strncasecmp(src_charset, "X-EUC-JP", 8) ||
               !strncasecmp(src_charset, "EUCJP", 5)) {
        src_ch = STR_EUCJP;
    } else if (!strncasecmp(src_charset, "SHIFT_JIS", 9) || !strncasecmp(src_charset, "SHIFT-JIS", 9) ||
               !strcasecmp(src_charset, "CP932") || !strcasecmp(src_charset, "WINDOWS-932") ||
               !strcasecmp(src_charset, "MS932") || !strncasecmp(src_charset, "X-SJIS", 6)) {
        src_ch = STR_SJIS;
    } else if (!strncasecmp(src_charset, "ISO-2022-JP", 11) || !strncasecmp(src_charset, "X-WINDOWS-ISO2022JP", 19)) {
        src_ch = STR_JIS;
    } else if (!strncasecmp(src_charset, "UTF-8", 5) || !strncasecmp(src_charset, "UTF8", 4)) {
        src_ch = STR_UTF8;
    } else if (!strncasecmp(src_charset, "UTF-7-IMAP", 10) || !strncasecmp(src_charset, "UTF7-IMAP", 9)) {
        src_ch = STR_UTF7_IMAP;
    } else if (!strncasecmp(src_charset, "UTF-7", 5) || !strncasecmp(src_charset, "UTF7", 4)) {
        src_ch = STR_UTF7;
    } else if (!strncasecmp(src_charset, "AUTODETECT_JP", 13)) {
        src_ch = jis_auto_detect(str);
        if (src_ch == (const char *)NULL) {
            src_ch = src_charset;
        }
    } else if (!strncasecmp(src_charset, "AUTODETECT", 10)) {
        src_ch = auto_detect(str);
        if (src_ch == (const char *)NULL) {
            src_ch = src_charset;
        }
    } else if (!strncasecmp(src_charset, "EUC-KR", 6) || !strncasecmp(src_charset, "EUCKR", 5) ||
               !strcasecmp(src_charset, "CP949") || !strcasecmp(src_charset, "WINDOWS-949") ||
               !strcasecmp(src_charset, "MS949") || !strncasecmp(src_charset, "X-EUC-KR", 8)) {
        src_ch = STR_EUCKR;
    } else if (!strncasecmp(src_charset, "GB", 2) ||
               !strncasecmp(src_charset, "EUC-CN", 6) || !strncasecmp(src_charset, "EUCCN", 5) ||
               !strcasecmp(src_charset, "CP936") || !strcasecmp(src_charset, "WINDOWS-936") ||
               !strcasecmp(src_charset, "MS936") || !strncasecmp(src_charset, "X-EUC-CN", 8)) {
        src_ch = STR_GBK;
    } else if (!strcasecmp(src_charset, "BIG5") || !strncasecmp(src_charset, "EUC-TW", 6) ||
               !strncasecmp(src_charset, "EUCTW", 5) || !strcasecmp(src_charset, "CP950") ||
               !strcasecmp(src_charset, "WINDOWS-950") || !strcasecmp(src_charset, "MS950")) {
        src_ch = STR_BIG5;
    } else if (!strcasecmp(src_charset, "CP1251") || !strcasecmp(src_charset, "WINDOWS-1251") ||
               !strcasecmp(src_charset, "MS1251")) {
        src_ch = STR_CP1251;
    } else if (!strcasecmp(src_charset, "CP1252") || !strcasecmp(src_charset, "WINDOWS-1252") ||
               !strcasecmp(src_charset, "MS1252")) {
        src_ch = STR_CP1252;
    } else if (!strncasecmp(src_charset, "KOI8-U", 6) ||
               !strcasecmp(src_charset, "CP21866") || !strcasecmp(src_charset, "WINDOWS-21866") ||
               !strcasecmp(src_charset, "MS21866")) {
        src_ch = STR_KOI8_U;
    } else if (!strncasecmp(src_charset, "KOI8", 4) ||
               !strcasecmp(src_charset, "CP20866") || !strcasecmp(src_charset, "WINDOWS-20866") ||
               !strcasecmp(src_charset, "MS20866")) {
        src_ch = STR_KOI8_R;
    } else {
        src_ch = src_charset;
    }

    if (dest_charset == NULL) {
        dest_ch = "";
    } else if (!strncasecmp(dest_charset, "ASCII", 5) || !strncasecmp(dest_charset, "US-ASCII", 8)) {
        dest_ch = STR_ASCII;
    } else if (!strncasecmp(dest_charset, "EUC-JP", 6) || !strncasecmp(dest_charset, "X-EUC-JP", 8) ||
               !strncasecmp(dest_charset, "EUCJP", 5)) {
        dest_ch = STR_EUCJP;
    } else if (!strncasecmp(dest_charset, "SHIFT_JIS", 9) || !strncasecmp(dest_charset, "SHIFT-JIS", 9) ||
               !strcasecmp(dest_charset, "CP932") || !strcasecmp(dest_charset, "WINDOWS-932") ||
               !strcasecmp(dest_charset, "MS932") || !strncasecmp(dest_charset, "X-SJIS", 6)) {
        dest_ch = STR_SJIS;
    } else if (!strncasecmp(dest_charset, "ISO-2022-JP", 11) || !strncasecmp(dest_charset, "X-WINDOWS-ISO2022JP", 19)) {
        dest_ch = STR_JIS;
    } else if (!strncasecmp(dest_charset, "UTF-8", 5) || !strncasecmp(dest_charset, "UTF8", 4)) {
        dest_ch = STR_UTF8;
    } else if (!strncasecmp(dest_charset, "UTF-7-IMAP", 10) || !strncasecmp(dest_charset, "UTF7-IMAP", 9)) {
        dest_ch = STR_UTF7_IMAP;
    } else if (!strncasecmp(dest_charset, "UTF-7", 5) || !strncasecmp(dest_charset, "UTF7", 4)) {
        dest_ch = STR_UTF7;
    } else if (!strncasecmp(dest_charset, "EUC-KR", 6) || !strncasecmp(dest_charset, "EUCKR", 5) ||
               !strcasecmp(dest_charset, "CP949") || !strcasecmp(dest_charset, "WINDOWS-949") ||
               !strcasecmp(dest_charset, "MS949") || !strncasecmp(dest_charset, "X-EUC-KR", 8)) {
        dest_ch = STR_EUCKR;
    } else if (!strncasecmp(dest_charset, "GB", 2) ||
               !strncasecmp(dest_charset, "EUC-CN", 6) || !strncasecmp(src_charset, "EUCCN", 5) ||
               !strcasecmp(dest_charset, "CP936") || !strcasecmp(dest_charset, "WINDOWS-936") ||
               !strcasecmp(dest_charset, "MS936") || !strncasecmp(dest_charset, "X-EUC-CN", 8)) {
        dest_ch = STR_GBK;
    } else if (!strcasecmp(dest_charset, "BIG5") || !strncasecmp(dest_charset, "EUC-TW", 6) ||
               !strncasecmp(dest_charset, "EUCTW", 5) || !strcasecmp(dest_charset, "CP950") ||
               !strcasecmp(dest_charset, "WINDOWS-950") || !strcasecmp(dest_charset, "MS950")) {
        dest_ch = STR_BIG5;
    } else if (!strcasecmp(dest_charset, "CP1251") || !strcasecmp(dest_charset, "WINDOWS-1251") ||
               !strcasecmp(dest_charset, "MS1251")) {
        dest_ch = STR_CP1251;
    } else if (!strcasecmp(dest_charset, "CP1252") || !strcasecmp(dest_charset, "WINDOWS-1252") ||
               !strcasecmp(dest_charset, "MS1252")) {
        dest_ch = STR_CP1252;
    } else if (!strncasecmp(dest_charset, "KOI8-U", 6) ||
               !strcasecmp(dest_charset, "CP21866") || !strcasecmp(dest_charset, "WINDOWS-21866") ||
               !strcasecmp(dest_charset, "MS21866")) {
        dest_ch = STR_KOI8_U;
    } else if (!strncasecmp(dest_charset, "KOI8", 4) ||
               !strcasecmp(dest_charset, "CP20866") || !strcasecmp(dest_charset, "WINDOWS-20866") ||
               !strcasecmp(dest_charset, "MS20866")) {
        dest_ch = STR_KOI8_R;
    } else {
        dest_ch = "";
    }

#if __ICONV == 1
    // get characterset converter from src_ch to dest_ch
    cd = iconv_open(dest_ch, src_ch);

    if (cd != (iconv_t)-1) {
#if __ICONV_CONST == 1
        const char *src;
        const char *ibuf, *ip;
#else
        char *src;
        char *ibuf, *ip;
#endif
        char *obuf, *op;
        size_t ileft, oleft;
        size_t olen, osize;
        int r;

#if __ICONV_CONST == 1
        src = (const char *)str;
#else
        src = (char *)str;
#endif

        osize = 1024 + 1;
        obuf = new char [osize];
        olen = 0;
        obuf[olen] = '\0';

        ibuf = src;
        ileft = (size_t)length;
        ip = ibuf;
        do {
            char *obuf_orig;

            oleft = 1024;
            osize = olen + oleft + 1;

            obuf_orig = obuf;
            obuf = new char [osize];
            memcpy(obuf, obuf_orig, olen + 1);
            delete [] obuf_orig;

            op = obuf + olen;

            r = iconv(cd, &ip, &ileft, &op, &oleft);

            if (r < 0 && errno == EILSEQ) {
                /* error */
                if (0 < oleft) {
                    *op = '?';
                    op++;
                    oleft--;
                    ip++;
                    ileft--;
                } else {
                    break;
                }
            }
            olen += (1024 - oleft);
        } while (r < 0 && errno == E2BIG);
        obuf[olen] = '\0';

        iconv_close(cd);

        if (!(r < 0 && errno == EILSEQ)) {
            buf = obuf;
            converted = 1;
        } else {
            delete [] obuf;
        }
    }

#else
    if (dest_ch[0] == '\0' || !strcasecmp(src_ch, dest_ch)) {
        buf = new char [length + 1];
        memcpy(buf, str, length);
        buf[length] = '\0';
        converted = 1;
    } else if (!strcasecmp(src_ch, STR_EUCJP)) {
        if (!strcasecmp(dest_ch, STR_SJIS)) {
            buf = eucjp_to_sjis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_JIS)) {
            buf = eucjp_to_jis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = eucjp_to_utf8(str);
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, STR_SJIS)) {
        if (!strcasecmp(dest_ch, STR_EUCJP)) {
            buf = sjis_to_eucjp(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_JIS)) {
            buf = sjis_to_jis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = sjis_to_utf8(str);
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, STR_JIS)) {
        if (!strcasecmp(dest_ch, STR_EUCJP)) {
            buf = jis_to_eucjp(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_SJIS)) {
            buf = jis_to_sjis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = jis_to_utf8(str);
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, STR_UTF8)) {
        if (!strcasecmp(dest_ch, STR_EUCJP)) {
            buf = utf8_to_eucjp(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_SJIS)) {
            buf = utf8_to_sjis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_JIS)) {
            buf = utf8_to_jis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_EUCKR)) {
            buf = utf8_to_euckr(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_UTF7)) {
            buf = utf8_to_utf7(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_UTF7_IMAP)) {
            buf = utf8_to_modutf7(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_GBK)) {
            buf = utf8_to_gbk(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_BIG5)) {
            buf = utf8_to_big5(str);
            converted = 1;
        } else if (!strncasecmp(dest_ch, "ISO-8859-", 9)) {
            int num;
            num = atoi(dest_ch + 9);
            buf = utf8_to_iso8859(str, num);
            converted = 1;
        } else if (!strcasecmp(dest_ch, "KOI8-U")) {
            buf = utf8_to_europe(str, 'U');
            converted = 1;
        } else if (!strcasecmp(dest_ch, "KOI8-R")) {
            buf = utf8_to_europe(str, 'R');
            converted = 1;
        } else if (!strcasecmp(dest_ch, "CP1251")) {
            buf = utf8_to_europe(str, '1');
            converted = 1;
        } else if (!strcasecmp(dest_ch, "CP1252")) {
            buf = utf8_to_europe(str, '2');
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, STR_AUTOJP) || !strcasecmp(src_ch, STR_AUTO)) {
        if (!strcasecmp(dest_ch, STR_EUCJP)) {
            buf = autojp_to_eucjp(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_SJIS)) {
            buf = autojp_to_sjis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_JIS)) {
            buf = autojp_to_jis(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = autojp_to_utf8(str);
            converted = 1;
        } else if (!strcasecmp(src_ch, STR_AUTOJP)) {
            src_ch = jis_auto_detect(str);
        } else {
            src_ch = auto_detect(str);
        }
    } else if (!strcasecmp(src_ch, STR_EUCKR)) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = euckr_to_utf8(str);
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, STR_GBK)) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = gbk_to_utf8(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_BIG5)) {
            buf1 = gbk_to_utf8(str);
            buf = utf8_to_big5(buf1);
            if (buf1 != NULL) {
                delete [] buf1;
            }
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, STR_BIG5)) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = big5_to_utf8(str);
            converted = 1;
        } else if (!strcasecmp(dest_ch, STR_GBK)) {
            buf1 = big5_to_utf8(str);
            buf = utf8_to_gbk(buf1);
            if (buf1 != NULL) {
                delete [] buf1;
            }
            converted = 1;
        }
    } else if (!strncasecmp(src_ch, "ISO-8859-", 9)) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            int num;
            num = atoi(dest_ch + 9);
            buf = iso8859_to_utf8(str, num);
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, "KOI8-U")) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = europe_to_utf8(str, 'U');
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, "KOI8-R")) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = europe_to_utf8(str, 'R');
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, "CP1251")) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = europe_to_utf8(str, '1');
            converted = 1;
        }
    } else if (!strcasecmp(src_ch, "CP1252")) {
        if (!strcasecmp(dest_ch, STR_UTF8)) {
            buf = europe_to_utf8(str, '2');
            converted = 1;
        }
    }
#endif

#if __ICONV == 0
    if (converted == 0 && strcasecmp(src_ch, STR_UTF8) != 0) {
        buf1 = NULL;
        if (!strcasecmp(src_ch, STR_EUCJP)) {
            buf1 = eucjp_to_utf8(str);
        } else if (!strcasecmp(src_ch, STR_SJIS)) {
            buf1 = sjis_to_utf8(str);
        } else if (!strcasecmp(src_ch, STR_JIS)) {
            buf1 = jis_to_utf8(str);
        } else if (!strcasecmp(src_ch, STR_UTF7)) {
            buf1 = utf7_to_utf8(str);
        } else if (!strcasecmp(src_ch, STR_UTF7_IMAP)) {
            buf1 = modutf7_to_utf8(str);
        } else if (!strcasecmp(src_ch, STR_EUCKR)) {
            buf1 = euckr_to_utf8(str);
        } else if (!strcasecmp(src_ch, STR_GBK)) {
            buf1 = gbk_to_utf8(str);
        } else if (!strcasecmp(src_ch, STR_BIG5)) {
            buf1 = big5_to_utf8(str);
        }
        if (buf1 != NULL) {
            buf = charset_convert(buf1, STR_UTF8, dest_ch);
            if (buf1 != NULL) {
                delete [] buf1;
            }
            converted = 1;
        }
    }
#endif

    if (converted == 0) {
        long length;
        length = strlen(str);
        buf = new char [length + 1];
        memcpy(buf, str, length);
        buf[length] = '\0';
        converted = 1;
    }

    if (!strcasecmp(dest_ch, STR_UTF8) ||
            (!strcasecmp(src_ch, STR_UTF8) && dest_ch[0] == '\0')) {
        utf8_clean(buf);
    }

    return buf;
}


/*! Auto detect character set
    @param str  text for character set auto detection
    @return Character set
 */
const char* auto_detect(const char *str) {
    const char *charcode;

    charcode = jis_auto_detect(str);
    if (!strcmp(charcode, STR_EUCJP)) {
        long i, badEUCKR;
        unsigned char c0, c1;

        // Judgment of EUC-KR
        badEUCKR = 0;
        for (i = 0; str[i] != '\0'; i++) {
            c0 = str[i];
            c1 = str[i + 1];
            if (c1 == (unsigned char)'\0') {
                if ((unsigned char)0x80 <= c0) {
                    badEUCKR++;
                    break;
                }
            } else if (((unsigned char)0xB0 <= c0 && c0 <= (unsigned char)0xC8) &&
                       ((unsigned char)0xA1 <= c1 && c1 <= (unsigned char)0xFE)) {
                i++;
            } else if ((unsigned char)0x80 <= c0) {
                badEUCKR++;
                break;
            }
        }
        if (badEUCKR == 0) {
            charcode = STR_EUCKR;
        }

    }

    return charcode;
}


/*! Auto detect japanese character set
    @param str  text for character set auto detection
    @return Character set ("CP932", "ISO-2022-JP", "EUC-JP", or "UTF-8").
 */
const char* jis_auto_detect(const char *str) {
    long length;
    const char *charcode;

    if (str != NULL && str[0] != '\0') {
        length = strlen(str);
    } else {
        length = 0;
    }

    if (0 < length) {
        long i, badEUCJP, badSJIS, badJIS, badUTF8, max;

        badEUCJP = 0;
        badSJIS = 0;
        badJIS = 0;
        badUTF8 = 0;

        max = length; // Processing will be omitted if comparison is fully possible

        // Judgment of EUC-JP
        for (i = 0; i < length; i++) {
            if (iseuc((unsigned char)str[i]) && (++i < length)) {
                if (!iseuc((unsigned char)str[i])) {
                    badEUCJP += 10;
                    i--;
                } else if (0x50 <= str[i-1]) {
                    badEUCJP++;
                }
            } else if (((unsigned char)str[i] == 0x8E) && (++i < length)) {
                if (ishankana((unsigned char)str[i])) {
                    badEUCJP++;
                } else {
                    badEUCJP += 10;
                    i--;
                }
            } else if ((str[i] == 0x1B) && (++i < length)) {
                if ((str[i] == '$') || (str[i] == '(')) {
                    badEUCJP += 20;
                } else {
                    i--;
                }
            } else if ((str[i] == '$') || (str[i] == '%')) {
                badEUCJP++;
            } else if (0x80 <= (unsigned char)str[i]) {
                badEUCJP++;
            }
            if (1000 < badEUCJP) {
                max = i;
                break;
            }
        }

        // Judgment of CP932 and ISO-2022-JP
        for (i = 0; i < length && i < max; i++) {
            unsigned char c, d;
            c = (unsigned char)str[i];
            if (issjis1(c) && (i + 1 < length)) {
                d = (unsigned char)str[i+1];
                if (issjis2(d)) {
                    badJIS += 10;
                    i++;
                } else if (0x989F <= (((unsigned int)str[i-1] << 8) | (unsigned int)str[i])) {
                    badSJIS++;
                    badJIS++;
                }
            } else if (0x80 <= (unsigned char)str[i]) {
                badJIS += 20;
                if (ishankana((unsigned char)str[i])) {
                    badSJIS++;
                } else {
                    badSJIS += 10;
                }
            }
            if (1000 < badSJIS) {
                max = i;
                break;
            }
        }

        // Judgment of UTF-8
        if (3 <= length && (unsigned char)str[0] == 0xEF &&
                (unsigned char)str[1] == 0xBB && (unsigned char)str[2] == 0xBF) {
            badEUCJP += 10;
            badSJIS += 10;
            badJIS += 10;
        }
        for (i = 0; i < length && i < max; i++) {
            if ((str[i] == 0x1B) && (i+1 < length)) {
                if ((str[i+1] == '$') || (str[i+1] == '(')) {
                    badUTF8 += 20;
                }
            } else if ((str[i] == '$') || (str[i] == '%')) {
                badUTF8++;
            }
            if (isutf8_1((unsigned char)str[i]) && (++i < length)) {
                if (isutf8_2((unsigned char)str[i]) && (++i < length)) {
                    if (3 <= utf8_len((unsigned char)str[i-2])) {
                        if (!isutf8_3((unsigned char)str[i])) {
                            badUTF8 += 10;
                            i -= 2;
                        }
                    } else {
                        i--;
                    }
                } else {
                    badUTF8 += 5;
                    i--;
                }
            } else if (iseuc((unsigned char)str[i])) {
                badUTF8 += 10;
            }
            if (1000 < badUTF8) {
                max = i;
                break;
            }
        }

        if (badSJIS < badEUCJP && badSJIS <= badUTF8) {
            // ISO-2022-JP or CP932
            if (badSJIS < badJIS) {
                charcode = STR_SJIS;
            } else {
                charcode = STR_JIS;
            }
        } else if (2 <= length && badUTF8 < badEUCJP && badUTF8 < badSJIS) {
            // UTF-8
            charcode = STR_UTF8;
        } else if (!(badEUCJP == 0 && badSJIS == 0 && badUTF8 == 0)) {
            // EUC-JP
            charcode = STR_EUCJP;
        } else {
            // ASCII
            charcode = STR_ASCII;
        }
    } else {
        // NULL
        charcode = STR_ASCII;
    }

    return charcode;
}


#if __ICONV == 0
/*! Convert character set from ISO-8859-x to UTF-8
    @param str          ISO-8859-x text
    @param iso8859_num  x of ISO-8859-x.
    @return Converted text  (UTF-8 text)
 */
char* iso8859_to_utf8(const char* str, int iso8859_num) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1;
    const unsigned char (*iso8859_map)[ISO8859_UNICODE_TABLE_MAX][2];

    if (iso8859_num <= 0 || (11 <= iso8859_num && iso8859_num <= 12) || 17 <= iso8859_num) {
        return NULL;
    }

    switch (iso8859_num) {
        case 1:
            iso8859_map = &iso8859_1_unicode;
            break;
        case 2:
            iso8859_map = &iso8859_2_unicode;
            break;
        case 3:
            iso8859_map = &iso8859_3_unicode;
            break;
        case 4:
            iso8859_map = &iso8859_4_unicode;
            break;
        case 5:
            iso8859_map = &iso8859_5_unicode;
            break;
        case 6:
            iso8859_map = &iso8859_6_unicode;
            break;
        case 7:
            iso8859_map = &iso8859_7_unicode;
            break;
        case 8:
            iso8859_map = &iso8859_8_unicode;
            break;
        case 9:
            iso8859_map = &iso8859_9_unicode;
            break;
        case 10:
            iso8859_map = &iso8859_10_unicode;
            break;
        case 13:
            iso8859_map = &iso8859_13_unicode;
            break;
        case 14:
            iso8859_map = &iso8859_14_unicode;
            break;
        case 15:
            iso8859_map = &iso8859_15_unicode;
            break;
        case 16:
            iso8859_map = &iso8859_16_unicode;
            break;
        default:
            return NULL;
            break;
    }

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*3;
        buf = new char[length2 + 1];

        // ISO-8859-x to UTF-8
        i = 0;
        j = 0;
        while (i < length) {
            if ((unsigned char)0x80 <= (unsigned char)str[i]) {
                a0 = (*iso8859_map)[(unsigned char)str[i] - (unsigned char)0x80][0];
                a1 = (*iso8859_map)[(unsigned char)str[i] - (unsigned char)0x80][1];
                if (a0 == (unsigned char)0x00 && (a1 & (unsigned char)0x80) == (unsigned char)0x00) {
                    buf[j++] = a1;
                } else if (a0 <= (unsigned char)0x07) {
                    buf[j++] = 0xC0 | ((a0 & 7) << 2) | (a1 >> 6);
                    buf[j++] = 0x80 | (0x3F & a1);
                } else {
                    buf[j++] = 0xE0 | (a0 >> 4);
                    buf[j++] = 0x80 | ((0x3F & (a0 << 2)) | (a1 >> 6));
                    buf[j++] = 0x80 | (0x3F & a1);
                }
            } else {
                buf[j++] = str[i];
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from UTF-8 to ISO-8859-x
    @param str          UTF-8 text
    @param iso8859_num  x of ISO-8859-x
    @return Converted text (ISO-8859-x text)
 */
char* utf8_to_iso8859(const char* str, int iso8859_num) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1, a2;
    const unsigned char (*iso8859_map)[ISO8859_UNICODE_TABLE_MAX][2];

    if (iso8859_num <= 0 || (11 <= iso8859_num && iso8859_num <= 12) || 17 <= iso8859_num) {
        return NULL;
    }

    switch (iso8859_num) {
        case 1:
            iso8859_map = &iso8859_1_unicode;
            break;
        case 2:
            iso8859_map = &iso8859_2_unicode;
            break;
        case 3:
            iso8859_map = &iso8859_3_unicode;
            break;
        case 4:
            iso8859_map = &iso8859_4_unicode;
            break;
        case 5:
            iso8859_map = &iso8859_5_unicode;
            break;
        case 6:
            iso8859_map = &iso8859_6_unicode;
            break;
        case 7:
            iso8859_map = &iso8859_7_unicode;
            break;
        case 8:
            iso8859_map = &iso8859_8_unicode;
            break;
        case 9:
            iso8859_map = &iso8859_9_unicode;
            break;
        case 10:
            iso8859_map = &iso8859_10_unicode;
            break;
        case 13:
            iso8859_map = &iso8859_13_unicode;
            break;
        case 14:
            iso8859_map = &iso8859_14_unicode;
            break;
        case 15:
            iso8859_map = &iso8859_15_unicode;
            break;
        case 16:
            iso8859_map = &iso8859_16_unicode;
            break;
        default:
            return NULL;
            break;
    }

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*3;
        buf = new char[length2 + 1];

        // UTF-8 to ISO-8859-x
        i = 0;
        j = 0;
        while (i < length && j <= length2) {
            a0 = str[i];
            if (isutf8_1(a0) && i+utf8_len(a0)-1 < length) {
                unsigned short len1, n;

                a1 = str[i+1];
                a2 = str[i+2];
                if (a0 == 0xEF && a1 == 0xBB && a2 == 0xBF) {
                    i += 3;
                    continue;
                }

                if (a0 == 0xE2 && a1 == 0x80 && a2 == 0xBE) {
                    buf[j++] = 0x7E;
                    i += 3;
                    continue;
                }

                len1 = utf8_len(str[i]);
                if (len1 == 2) {
                    a0 = 0x07 & (str[i] >> 2);
                    a1 = (str[i] << 6) | (0x3F & str[i+1]);
                } else if (len1 == 3) {
                    a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                    a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
                } else if (len1 == 1) {
                    buf[j++] = str[i];
                    i++;
                    continue;
                } else {
                    buf[j++] = '?';
                    i += len1;
                    continue;
                }

                n = 0;
                while (n < ISO8859_UNICODE_TABLE_MAX) {
                    if ((*iso8859_map)[n][0] == a0 && (*iso8859_map)[n][1] == a1) {
                        buf[j++] = (unsigned char)(0x80 + n);
                        i += (utf8_len(str[i])-1);
                        break;
                    }
                    n++;
                }
                if (ISO8859_UNICODE_TABLE_MAX <= n) {
                    buf[j++] = '?';
                }
            } else {
                buf[j++] = str[i];
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from KOI8-x/CP1251/CP1252 to UTF-8
    @param str          KOI-8-x text
    @param ch           x of KOI8-x. '1' for CP1251, '2' for CP1252
    @return Converted text  (UTF-8 text)
 */
char* europe_to_utf8(const char* str, char ch) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1;
    const unsigned char (*eur_map)[EUROPE_UNICODE_TABLE_MAX][2];

    if (ch == '1') {
        eur_map = &cp1251_unicode;
    } else if (ch == '2') {
        eur_map = &cp1252_unicode;
    } else if (ch == 'U' || ch == 'u') {
        eur_map = &koi8_u_unicode;
    } else {
        eur_map = &koi8_r_unicode;
    }

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*3;
        buf = new char[length2 + 1];

        // KOI8-x/CP1251/CP1252 to UTF-8
        i = 0;
        j = 0;
        while (i < length) {
            if ((unsigned char)0x80 <= (unsigned char)str[i]) {
                a0 = (*eur_map)[(unsigned char)str[i] - (unsigned char)0x80][0];
                a1 = (*eur_map)[(unsigned char)str[i] - (unsigned char)0x80][1];
                if (a0 == (unsigned char)0x00 && (a1 & (unsigned char)0x80) == (unsigned char)0x00) {
                    buf[j++] = a1;
                } else if (a0 <= (unsigned char)0x07) {
                    buf[j++] = 0xC0 | ((a0 & 7) << 2) | (a1 >> 6);
                    buf[j++] = 0x80 | (0x3F & a1);
                } else {
                    buf[j++] = 0xE0 | (a0 >> 4);
                    buf[j++] = 0x80 | ((0x3F & (a0 << 2)) | (a1 >> 6));
                    buf[j++] = 0x80 | (0x3F & a1);
                }
            } else {
                buf[j++] = str[i];
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from UTF-8 to KOI8-x/CP1251/CP1252
    @param str          UTF-8 text
    @param ch           x of KOI8-x. '1' for CP1251, '2' for CP1252
    @return Converted text (KOI8-x/CP1251/CP1252 text)
 */
char* utf8_to_europe(const char* str, char ch) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1, a2;
    const unsigned char (*eur_map)[EUROPE_UNICODE_TABLE_MAX][2];

    if (ch == '1') {
        eur_map = &cp1251_unicode;
    } else if (ch == '2') {
        eur_map = &cp1252_unicode;
    } else if (ch == 'U' || ch == 'u') {
        eur_map = &koi8_u_unicode;
    } else {
        eur_map = &koi8_r_unicode;
    }

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*3;
        buf = new char[length2 + 1];

        // UTF-8 to KOI8-x/CP1251/CP1252
        i = 0;
        j = 0;
        while (i < length && j <= length2) {
            a0 = str[i];
            if (isutf8_1(a0) && i+utf8_len(a0)-1 < length) {
                unsigned short len1, n;

                a1 = str[i+1];
                a2 = str[i+2];
                if (a0 == 0xEF && a1 == 0xBB && a2 == 0xBF) {
                    i += 3;
                    continue;
                }

                if (a0 == 0xE2 && a1 == 0x80 && a2 == 0xBE) {
                    buf[j++] = 0x7E;
                    i += 3;
                    continue;
                }

                len1 = utf8_len(str[i]);
                if (len1 == 2) {
                    a0 = 0x07 & (str[i] >> 2);
                    a1 = (str[i] << 6) | (0x3F & str[i+1]);
                } else if (len1 == 3) {
                    a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                    a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
                } else if (len1 == 1) {
                    buf[j++] = str[i];
                    i++;
                    continue;
                } else {
                    buf[j++] = '?';
                    i += len1;
                    continue;
                }

                n = 0;
                while (n < EUROPE_UNICODE_TABLE_MAX) {
                    if ((*eur_map)[n][0] == a0 && (*eur_map)[n][1] == a1) {
                        buf[j++] = (unsigned char)(0x80 + n);
                        i += (utf8_len(str[i])-1);
                        break;
                    }
                    n++;
                }
                if (EUROPE_UNICODE_TABLE_MAX <= n) {
                    buf[j++] = '?';
                    i += (len1 - 1);
                }
            } else {
                buf[j++] = (a0 & (unsigned char)0x80)?'?':a0;
            }
            i++;
        }
        buf[j] = '\0';
    }
    return buf;
}


/*! Convert character set from UTF-8 to EUC-JP
    @param str  UTF-8 text
    @return Converted text (EUC-JP text)
 */
char* utf8_to_eucjp(const char *str) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1, a2;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*2;
        buf = new char[length2 + 1];

        // UTF-8 to EUC-JP
        i = 0;
        j = 0;
        while (i < length && j <= length2) {
            a0 = str[i];
            if (isutf8_1(a0) && i+utf8_len(a0)-1 < length) {
                int pos, old_pos, min, max, s;
                unsigned short len1;

                a1 = str[i+1];
                a2 = str[i+2];
                if (a0 == 0xEF && a1 == 0xBB && a2 == 0xBF) {
                    i += 3;
                    continue;
                }

                if (a0 == 0xC2 && a1 == 0xA5) {
                    buf[j++] = 0x5C;
                    i += 2;
                    continue;
                }

                if (a0 == 0xE2 && a1 == 0x80 && a2 == 0xBE) {
                    buf[j++] = 0x7E;
                    i += 3;
                    continue;
                }

                len1 = utf8_len(str[i]);
                if (len1 == 2) {
                    a0 = 0x07 & (str[i] >> 2);
                    a1 = (str[i] << 6) | (0x3F & str[i+1]);
                } else if (len1 == 3) {
                    a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                    a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
                } else {
                    buf[j++] = '?';
                    i += len1;
                    continue;
                }

                min = index_unicode_eucjp[a0];
                if (min < 0) {
                    max = min;
                } else if (a0 != 255) {
                    max = min + 255;
                    for (s = a0+1; s <= 255; s++) {
                        if (0 <= index_unicode_eucjp[s]) {
                            max = index_unicode_eucjp[s]-1;
                            break;
                        }
                    }
                    if (UNICODE_EUCJP_TABLE_MAX-1 < max) {
                        max = UNICODE_EUCJP_TABLE_MAX-1;
                    }
                } else {
                    max = UNICODE_EUCJP_TABLE_MAX-1;
                }
                old_pos = -1;
                pos = (min+max)/2;
                if (0 <= min) do {
                        if (unicode_eucjp[pos][0] == a1) {
                            buf[j++] = unicode_eucjp[pos][1];
                            buf[j++] = unicode_eucjp[pos][2];
                            i += (utf8_len(str[i])-1);
                            break;
                        }

                        old_pos = pos;
                        if (unicode_eucjp[old_pos][0] < a1) {
                            pos = (old_pos+1+max)/2;
                            min = old_pos+1;
                        } else {
                            pos = (min+old_pos-1)/2;
                            max = old_pos-1;
                        }
                    } while (pos != old_pos && min <= max);

                if (min < 0 || unicode_eucjp[pos][0] != a1) {
                    buf[j++] = '?';
                    i += (len1 - 1);
                }
            } else {
                buf[j++] = (a0 & (unsigned char)0x80)?'?':a0;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from CP932 to EUC-JP
    @param str  CP932 text
    @return Converted text (EUC-JP text)
 */
char* sjis_to_eucjp(const char *str) {
    char *buf;
    long i, j, hankana, length;
    unsigned char c, d;
    enum {NORMAL, KANJI, HANKANA} mode = NORMAL;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length*3 + 1];

        // CP932 to EUC-JP
        mode = NORMAL;
        hankana = 0;
        i = 0;
        j = 0;
        while (i < length) {
            c = str[i];
            d = c?(str[i+1]):0;
            if (issjis1(c) && issjis2(d)) {
                if (c <= 0x9F) {
                    if (d < 0x9F) {
                        c = (c << 1) - 0xE1;
                    } else {
                        c = (c << 1) - 0xE0;
                    }
                } else {
                    if (d < 0x9F) {
                        c = (c << 1) - 0x161;
                    } else {
                        c = (c << 1) - 0x160;
                    }
                }
                if (d < 0x7F) {
                    d -= 0x1F;
                } else if (d < 0x9F) {
                    d -= 0x20;
                } else {
                    d -= 0x7E;
                }
                buf[j++] = (unsigned char)(c | 0x80);
                buf[j++] = (unsigned char)(d | 0x80);
                i++;
            } else if (ishankana((unsigned char)str[i])) {
                if (j + (++hankana) < length*3) {
                    buf[j++] = 0x8E;
                    buf[j++] = str[i];
                } else {
                    buf[j++] = '?';
                }
            } else {
                buf[j++] = str[i];
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from ISO-2022-JP to EUC-JP
    @param str  ISO-2022-JP text
    @return Converted text (EUC-JP text)
 */
char* jis_to_eucjp(const char *str) {
    char *buf;
    long i, j, hankana, length;
    unsigned char c, d;
    enum {NORMAL, KANJI, HANKANA} mode = NORMAL;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length*3 + 1];

        // ISO-2022-JP to EUC-JP
        mode = NORMAL;
        hankana = 0;
        i = 0;
        j = 0;
        while (i < length) {
            c = str[i];
            d = c?(str[i+1]):0;
            if (str[i] == 0x1B) {
                if (str[i+1] == '$') {
                    if (i+2 < length && (str[i+2] == '@' || str[i+2] == 'B')) {
                        mode = KANJI;
                        i += 2;
                    } else {
                        buf[j++] = 0x1B;
                    }
                } else if (str[i+1] == '(') {
                    if (i+2 < length && (str[i+2] == 'B' || str[i+2] == 'J')) {
                        mode = NORMAL;
                        i += 2;
                    } else if (i+2 < length && (str[i+2] == 'I')) {
                        mode = HANKANA;
                        i += 2;
                    } else {
                        buf[j++] = 0x1B;
                        i++;
                    }
                } else {
                    buf[j++] = 0x1B;
                }
            } else if (str[i] == 0x0E) {
                mode = HANKANA;
            } else if (str[i] == 0x0F) {
                mode = NORMAL;
            } else if ((mode == KANJI) && (isjis(str[i]) && isjis(str[i+1]))) {
                buf[j++] = (str[i] | 0x80);
                buf[j++] = (str[i+1] | 0x80);
                i++;
            } else if ((mode == HANKANA) && (0x20 <= str[i] && str[i] <= 0x5F)) {
                buf[j++] = 0x8E;
                buf[j++] = (str[i] | 0x80);
            } else if (issjis1(c) && issjis2(d)) {
                if (c <= 0x9F) {
                    if (d < 0x9F) {
                        c = (c << 1) - 0xE1;
                    } else {
                        c = (c << 1) - 0xE0;
                    }
                } else {
                    if (d < 0x9F) {
                        c = (c << 1) - 0x161;
                    } else {
                        c = (c << 1) - 0x160;
                    }
                }
                if (d < 0x7F) {
                    d -= 0x1F;
                } else if (d < 0x9F) {
                    d -= 0x20;
                } else {
                    d -= 0x7E;
                }
                buf[j++] = (unsigned char)(c | 0x80);
                buf[j++] = (unsigned char)(d | 0x80);
                i++;
            } else if (ishankana((unsigned char)str[i])) {
                if (j + (++hankana) < length*3) {
                    buf[j++] = 0x8E;
                    buf[j++] = str[i];
                } else {
                    buf[j++] = '?';
                }
            } else {
                buf[j++] = str[i];
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from EUC-JP to CP932
    @param str  EUC-JP text
    @return Converted text (CP932 text)
 */
char* eucjp_to_sjis(const char *str) {
    char *buf;
    long i, j, length;
    unsigned char a0, a1;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length + 1];

        // EUC-JP to CP932
        i = 0;
        j = 0;
        while (i < length) {
            a0 = str[i];
            a1 = str[i+1];
            if (a0 == 0x8E && ishankana(a1)) {
                buf[j++] = a1;
                i++;
            } else if (iseuc(a0) && iseuc(a1)) {
                a0 &= 0x7F;
                a1 &= 0x7F;
                if (a0 & 1) {
                    if (a1 < 0x60) {
                        a1 += 0x1F;
                    } else {
                        a1 += 0x20;
                    }
                } else {
                    a1 += 0x7E;
                }
                if (a0 < 0x5f) {
                    a0 = (a0 + 0xE1) >> 1;
                } else {
                    a0 = (unsigned char)((a0 + 0x161) >> 1);
                }
                buf[j++] = a0;
                buf[j++] = a1;
                i++;
            } else {
                buf[j++] = a0;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from EUC-JP to ISO-2022-JP
    @param str  EUC-JP text
    @return Converted text (ISO-2022-JP text)
 */
char* eucjp_to_jis(const char *str) {
    char *buf;
    long i, j, buf_size, length;
    unsigned char a0, a1;
    enum {NORMAL, KANJI, HANKANA} mode = NORMAL;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[(buf_size = length*3) + 1];

        // EUC-JP to ISO-2022-JP
        i = 0;
        j = 0;
        while (i < length) {
            if (iseuc(a0 = str[i]) && iseuc(a1 = str[i+1]) && j+1 < buf_size) {
                if (mode != KANJI && j+4 < buf_size) {
                    if (mode == HANKANA && j+5 < buf_size) {
                        buf[j++] = 0x0F;
                    }
                    mode = KANJI;
                    buf[j++] = 0x1B;
                    buf[j++] = '$';
                    buf[j++] = 'B';
                }
                buf[j++] = a0 & 0x7F;
                buf[j++] = a1 & 0x7F;
                i++;
            } else if (a0 == 0x8E && j+1 < buf_size) {
                if (ishankana(a1 = str[i+1])) {
                    if (mode != HANKANA && j+2 < buf_size) {
                        if (mode == KANJI && j+5 < buf_size) {
                            buf[j++] = 0x1B;
                            buf[j++] = '(';
                            buf[j++] = 'B';
                        }
                        mode = HANKANA;
                    }
                    buf[j++] = 0x0e;
                    buf[j++] = a1 & 0x7F;
                    i++;
                }
            } else {
                if (mode == KANJI && j+3 < buf_size) {
                    buf[j++] = 0x1B;
                    buf[j++] = '(';
                    buf[j++] = 'B';
                } else if (mode == HANKANA && j+1 < buf_size) {
                    buf[j++] = 0x0F;
                }
                mode = NORMAL;
                buf[j++] = a0;
            }
            i++;
        }
        if (mode == KANJI && j+2 < buf_size) {
            buf[j++] = 0x1B;
            buf[j++] = '(';
            buf[j++] = 'B';
        } else if (mode == HANKANA && j < buf_size) {
            buf[j++] = 0x0F;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from EUC-JP to UTF-8
    @param str  EUC-JP text.
    @return Converted text (UTF-8 text)
 */
char* eucjp_to_utf8(const char *str) {
    char *buf1, *buf2;

    buf1 = eucjp_to_sjis(str);
    buf2 = sjis_to_utf8(buf1);

    if (buf1 != NULL) {
        delete [] buf1;
    }
    return buf2;
}


/*! Convert character set from UTF-8 to CP932
    @param str  UTF-8 text
    @return Converted text (CP932 text)
 */
char* utf8_to_sjis(const char *str) {
    char *buf1, *buf2;

    buf1 = utf8_to_eucjp(str);
    buf2 = eucjp_to_sjis(buf1);

    if (buf1 != NULL) {
        delete [] buf1;
    }
    return buf2;
}


/*! Convert character set from UTF-8 to ISO-2022-JP
    @param str  UTF-8 text
    @return Converted text (ISO-2022-JP text)
 */
char* utf8_to_jis(const char *str) {
    char *buf1, *buf2;

    buf1 = utf8_to_eucjp(str);
    buf2 = eucjp_to_jis(buf1);

    if (buf1 != NULL) {
        delete [] buf1;
    }
    return buf2;
}


/*! Convert character set from CP932 to UTF-8
    @param str  CP932 text
    @return Converted text (UTF-8 text)
 */
char* sjis_to_utf8(const char *str) {
    char *buf;
    long i, j, length;
    unsigned char c, d, a0, a1;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length*3 + 1];

        // CP932 to UTF-8
        i = 0;
        j = 0;
        while (i < length) {
            c = str[i];
            d = c?(str[i+1]):0;
            if ((issjis1(c) && issjis2(d)) || ishankana(c)) {
                int pos, old_pos, min, max, s;

                min = index_sjis_unicode[c];
                if (min < 0) {
                    max = min;
                } else if (c != 255) {
                    max = min + 255;
                    for (s = c+1; s <= 255; s++) {
                        if (0 <= index_sjis_unicode[s]) {
                            max = index_sjis_unicode[s]-1;
                            break;
                        }
                    }
                    if (SJIS_UNICODE_TABLE_MAX-1 < max) {
                        max = SJIS_UNICODE_TABLE_MAX-1;
                    }
                } else {
                    max = SJIS_UNICODE_TABLE_MAX-1;
                }
                old_pos = -1;
                pos = (min+max)/2;
                if (0 <= min) do {
                        if (sjis_unicode[pos][0] == d) {
                            break;
                        }

                        old_pos = pos;
                        if (sjis_unicode[old_pos][0] < d) {
                            pos = (old_pos+1+max)/2;
                            min = old_pos+1;
                        } else {
                            pos = (min+old_pos-1)/2;
                            max = old_pos-1;
                        }
                    } while (pos != old_pos && min <= max);

                if (0 <= min && pos != old_pos && min <= max) {
                    a0 = sjis_unicode[pos][1];
                    a1 = sjis_unicode[pos][2];
                    if (a0 == (unsigned char)0x00 && (a1 & (unsigned char)0x80) == (unsigned char)0x00) {
                        buf[j++] = a1;
                    } else if (a0 <= (unsigned char)0x07) {
                        buf[j++] = 0xC0 | ((a0 & 7) << 2) | (a1 >> 6);
                        buf[j++] = 0x80 | (0x3F & a1);
                    } else {
                        buf[j++] = 0xE0 | (a0 >> 4);
                        buf[j++] = 0x80 | ((0x3F & (a0 << 2)) | (a1 >> 6));
                        buf[j++] = 0x80 | (0x3F & a1);
                    }
                    i++;
                } else {
                    buf[j++] = '?';
                }
            } else {
                buf[j++] = c;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from ISO-2022-JP to UTF-8
    @param str  ISO-2022-JP text
    @return Converted text (UTF-8 text)
 */
char* jis_to_utf8(const char *str) {
    char *buf1, *buf2;

    buf1 = jis_to_sjis(str);
    buf2 = sjis_to_utf8(buf1);

    if (buf1 != NULL) {
        delete [] buf1;
    }
    return buf2;
}


/*! Convert character set from CP932 to ISO-2022-JP
    @param str  CP932 text
    @return Converted text (ISO-2022-JP text)
 */
char* sjis_to_jis(const char *str) {
    char *buf;
    long i, j, jiskanji, length;
    unsigned char a0, a1;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length*3 + 1];

        // CP932 to ISO-2022-JP
        jiskanji = 0;
        i = 0;
        j = 0;
        while (i < length) {
            a0 = (unsigned char)str[i];
            if (0x81 <= (unsigned char)str[i] && (unsigned char)str[i] <= 0x9F) {
                // Kanji
                // From division 1 to division 62
                a1 = str[i+1];
                if (a1 <= 0x9E) {
                    // Oddth division
                    a0 = str[i] - 0x80;
                    a0 = (a0 << 1) -1;
                    a1 = (a1 < 0x7F)?(a1-63):(a1-64);
                } else {
                    // Eventh division
                    a0 = ((unsigned char)str[i] - 0x80) << 1;
                    a1 = a1 - 0x9E;
                }
                // to iso_2022_jp
                a0 += 0x20;
                a1 += 0x20;

                if (jiskanji == 0) {
                    buf[j++] = 0x1B;
                    buf[j++] = 0x24;
                    buf[j++] = 0x42;
                    jiskanji = 1;
                }
                buf[j++] = a0;
                buf[j] = a1;
                i += 2;
            } else if (0xE0 <= (unsigned char)str[i] && (unsigned char)str[i] <= 0xF9) {
                // From division 63 to division 94
                a0 = (str[i] - 0xE0) << 1;
                a1 = str[i+1];
                if (a1 <= 0x9E) {
                    // Oddth division
                    a0 += 0x3F;
                    a1 = (a1 < 0x7F)?(a1-63):(a1-64);
                } else {
                    // Eventh division
                    a0 += 0x40;
                    a1 = a1 - 0x9E;
                }
                a0 += 0x20;
                a1 += 0x20;
                if (jiskanji == 0) {
                    buf[j++] = 0x1B;
                    buf[j++] = 0x24;
                    buf[j++] = 0x42;
                    jiskanji = 1;
                }
                buf[j++] = a0;
                buf[j] = a1;
                i += 2;
            } else if (0xFA <= (unsigned char)str[i] && (unsigned char)str[i] <= 0xFC) {
                // From division 115 to division 119 to CP50220
                unsigned char s1;
                unsigned char s2;
                unsigned short linear;

                s1 = (unsigned char)(str[i]);
                s2 = (unsigned char)(str[i+1]);

                linear = 188 * s1 + (s2 < 0x7f ? s2 - 0x40 : s2 - 0x41);
                if (0xB7B4 <= linear) {
                    linear -= 0xB7B4 - 0xAE0C;
                } else if (0xB7AD <= linear) {
                    linear -= 0xB7AD - 0xAF81;
                } else {
                    linear -= 0xB798 - 0xAF76;
                }

                if (!jiskanji) {
                    buf[j++] = 0x1B;
                    buf[j++] = 0x24;
                    buf[j++] = 0x42;
                    jiskanji = 1;
                }
                buf[j++] = (linear - 0x819E) / 94;
                buf[j] = (linear % 94) + 0x21;
                i += 2;
            } else {
                // Non-Kanji
                if (jiskanji) {
                    buf[j++] = 0x1B;
                    buf[j++] = 0x28;
                    buf[j++] = 0x42;
                    jiskanji = 0;
                }
                buf[j] = str[i];
                i++;
            }
            j++;
        }

        if (jiskanji) {
            buf[j++] = 0x1B;
            buf[j++] = 0x28;
            buf[j++] = 0x42;
        }

        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from ISO-2022-JP to CP932
    @param str  ISO-2022-JP text
    @return Converted text (CP932 text)
 */
char *jis_to_sjis(const char *str) {
    char *buf;
    long i, j, length;
    const char *from;
    enum {IS_ROMAN, IS_KANJI, IS_KANA} shifted;
    int c, normal_flg;
    int hi, lo;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length * 3 + 2048];

        // ISO-2022-JP to CP932
        shifted = IS_ROMAN;
        i = 0;
        j = 0;
        from = str;
        while (from[i] != '\0' && i < length) {
            normal_flg = 0;
            if ((unsigned char)(from[i]) == 0x1B) {
                if (((unsigned char)(from[i+1]) == '$') &&
                        (((unsigned char)(from[i+2])) == 'B' ||
                         ((unsigned char)(from[i+2])) == '@')) {
                    shifted = IS_KANJI;
                    i += 3;
                } else if ((unsigned char)(from[i+1]) == '(' &&
                           (unsigned char)(from[i+2]) == 'I') {
                    shifted = IS_KANA;
                    i += 3;
                } else if ((unsigned char)(from[i+1]) == '(' &&
                           (((unsigned char)(from[i+2])) == 'J' ||
                            ((unsigned char)(from[i+2])) == 'B' ||
                            ((unsigned char)(from[i+2])) == 'H')) {
                    shifted = IS_ROMAN;
                    i += 3;
                } else {             // sequence error
                    normal_flg = 1;
                }
            } else if ((unsigned char)(from[i]) == 0x0E) {
                shifted = IS_KANA;   // to KANA
                i++;
            } else if ((unsigned char)(from[i]) == 0x0F) {
                shifted = IS_ROMAN;  // to ROMAN
                i++;
            } else {
                normal_flg = 1;
            }
            if (normal_flg) {
                switch (shifted) {
                    case IS_KANJI:
                        hi = (int) from[i] & 0xFF;
                        lo = (int) from[i+1] & 0xFF;
                        if (hi & 1) {
                            c = ((hi / 2 + (hi < 0x5F ? 0x71 : 0xB1)) << 8) |
                                (lo + (lo >= 0x60 ? 0x20 : 0x1F));
                        } else {
                            c = ((hi / 2 + (hi < 0x5F ? 0x70 : 0xB0)) << 8) | (lo + 0x7E);
                        }
                        buf[j++] = (c >> 8) & 0xFF;
                        buf[j++] = c;
                        i += 2;
                        break;
                    case IS_KANA:
                        buf[j++] = ((int) from[i]) + 0x80;
                        i++;
                        break;
                    case IS_ROMAN:
                    default:
                        buf[j++] = from[i];
                        i++;
                        break;
                }
            }
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Auto detect japanese character set, and convert text to EUC-JP
    @param str  text for convertion)
    @return Converted text (EUC-JP text)
 */
char* autojp_to_eucjp(const char *str) {
    char *buf;
    const char *charset;
    long length;

    if (str == NULL) {
        return NULL;
    }

    charset = jis_auto_detect(str);
    if (charset == (const char *)NULL) {
        charset = "";
    }
    if (!strcasecmp(charset, STR_SJIS)) {
        buf = sjis_to_eucjp(str);
    } else if (!strcasecmp(charset, STR_JIS)) {
        buf = jis_to_eucjp(str);
    } else if (!strcasecmp(charset, STR_UTF8)) {
        buf = utf8_to_eucjp(str);
    } else {
        length = strlen(str);
        buf = new char [length + 1];
        memcpy(buf, str, length);
        buf[length] = '\0';
    }

    return buf;
}


/*! Auto detect japanese character set, and convert text to CP932.
    @param str  text for convertion
    @return Converted text (CP932 text)
 */
char* autojp_to_sjis(const char *str) {
    char *buf;
    const char *charset;
    long length;

    if (str == NULL) {
        return NULL;
    }

    charset = jis_auto_detect(str);
    if (charset == (const char *)NULL) {
        charset = "";
    }
    if (!strcasecmp(charset, STR_EUCJP)) {
        buf = eucjp_to_sjis(str);
    } else if (!strcasecmp(charset, STR_JIS)) {
        buf = jis_to_sjis(str);
    } else if (!strcasecmp(charset, STR_UTF8)) {
        buf = utf8_to_sjis(str);
    } else {
        length = strlen(str);
        buf = new char [length + 1];
        memcpy(buf, str, length);
        buf[length] = '\0';
    }

    return buf;
}


/*! Auto detect japanese character set, and convert text to ISO-2022-JP
    @param str  text for convertion
    @return Converted text (ISO-2022-JP text)
 */
char* autojp_to_jis(const char *str) {
    char *buf;
    const char *charset;
    long length;

    if (str == NULL) {
        return NULL;
    }

    charset = jis_auto_detect(str);
    if (charset == (const char *)NULL) {
        charset = "";
    }
    if (!strcasecmp(charset, STR_EUCJP)) {
        buf = eucjp_to_jis(str);
    } else if (!strcasecmp(charset, STR_SJIS)) {
        buf = sjis_to_jis(str);
    } else if (!strcasecmp(charset, STR_UTF8)) {
        buf = utf8_to_jis(str);
    } else {
        length = strlen(str);
        buf = new char [length + 1];
        memcpy(buf, str, length);
        buf[length] = '\0';
    }

    return buf;
}


/*! Auto detect japanese character set, and convert text to UTF-8
    @param str  text for convertion
    @return Converted text: UTF-8 text.
 */
char* autojp_to_utf8(const char *str) {
    char *buf;
    const char *charset;
    long length;

    if (str == NULL) {
        return NULL;
    }

    charset = jis_auto_detect(str);
    if (charset == (const char *)NULL) {
        charset = "";
    }
    if (!strcasecmp(charset, STR_EUCJP)) {
        buf = eucjp_to_utf8(str);
    } else if (!strcasecmp(charset, STR_SJIS)) {
        buf = sjis_to_utf8(str);
    } else if (!strcasecmp(charset, STR_JIS)) {
        buf = jis_to_utf8(str);
    } else {
        length = strlen(str);
        buf = new char [length + 1];
        memcpy(buf, str, length);
        buf[length] = '\0';
    }

    return buf;
}


/*! Convert character set from CP949 to UTF-8
    @param str  CP949 text.
    @return Converted text (UTF-8 text)
 */
char* euckr_to_utf8(const char* str) {
    char *buf;
    long i, j, length;
    unsigned char c, d, a0, a1;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length*3 + 1];

        // CP949 to UTF-8
        i = 0;
        j = 0;
        while (i < length) {
            c = str[i];
            d = str[i+1];
            if ((unsigned char)0x80 <= c) {
                int pos, old_pos, min, max, s;

                min = index_euckr_unicode[c];
                if (min < 0) {
                    max = min;
                } else if (c != 255) {
                    max = min + 255;
                    for (s = c+1; s <= 255; s++) {
                        if (0 <= index_euckr_unicode[s]) {
                            max = index_euckr_unicode[s]-1;
                            break;
                        }
                    }
                    if (UNICODE_EUCKR_TABLE_MAX-1 < max) {
                        max = EUCKR_UNICODE_TABLE_MAX-1;
                    }
                } else {
                    max = EUCKR_UNICODE_TABLE_MAX-1;
                }
                old_pos = -1;
                pos = (min+max)/2;
                if (0 <= min) do {
                        if (euckr_unicode[pos][0] == d) {
                            break;
                        }

                        old_pos = pos;
                        if (euckr_unicode[old_pos][0] < d) {
                            pos = (old_pos+1+max)/2;
                            min = old_pos+1;
                        } else {
                            pos = (min+old_pos-1)/2;
                            max = old_pos-1;
                        }
                    } while (pos != old_pos && min <= max);

                if (0 <= min && pos != old_pos && min <= max) {
                    a0 = euckr_unicode[pos][1];
                    a1 = euckr_unicode[pos][2];
                    if (a0 == (unsigned char)0x00 && (a1 & (unsigned char)0x80) == (unsigned char)0x00) {
                        buf[j++] = a1;
                    } else if (a0 <= (unsigned char)0x07) {
                        buf[j++] = 0xC0 | ((a0 & 7) << 2) | (a1 >> 6);
                        buf[j++] = 0x80 | (0x3F & a1);
                    } else {
                        buf[j++] = 0xE0 | (a0 >> 4);
                        buf[j++] = 0x80 | ((0x3F & (a0 << 2)) | (a1 >> 6));
                        buf[j++] = 0x80 | (0x3F & a1);
                    }
                    i++;
                } else {
                    buf[j++] = '?';
                }
            } else {
                buf[j++] = c;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from UTF-8 to CP949
    @param str  UTF-8 text
    @return Converted text (CP949 text)
 */
char* utf8_to_euckr(const char* str) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1, a2;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*2;
        buf = new char[length2 + 1];

        // UTF-8 to CP949
        i = 0;
        j = 0;
        while (i < length && j <= length2) {
            a0 = str[i];
            if (isutf8_1(a0) && i+utf8_len(a0)-1 < length) {
                int pos, old_pos, min, max, s;
                unsigned short len1;

                a1 = str[i+1];
                a2 = str[i+2];
                if (a0 == 0xEF && a1 == 0xBB && a2 == 0xBF) {
                    i += 3;
                    continue;
                }

                if (a0 == 0xE2 && a1 == 0x80 && a2 == 0xBE) {
                    buf[j++] = 0x7E;
                    i += 3;
                    continue;
                }

                len1 = utf8_len(str[i]);
                if (len1 == 2) {
                    a0 = 0x07 & (str[i] >> 2);
                    a1 = (str[i] << 6) | (0x3F & str[i+1]);
                } else if (len1 == 3) {
                    a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                    a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
                } else {
                    buf[j++] = '?';
                    i += len1;
                    continue;
                }

                min = index_unicode_euckr[a0];
                if (min < 0) {
                    max = min;
                } else if (a0 != 255) {
                    max = min + 255;
                    for (s = a0+1; s <= 255; s++) {
                        if (0 <= index_unicode_euckr[s]) {
                            max = index_unicode_euckr[s]-1;
                            break;
                        }
                    }
                    if (UNICODE_EUCKR_TABLE_MAX-1 < max) {
                        max = UNICODE_EUCKR_TABLE_MAX-1;
                    }
                } else {
                    max = UNICODE_EUCKR_TABLE_MAX-1;
                }
                old_pos = -1;
                pos = (min+max)/2;
                if (0 <= min) do {
                        if (unicode_euckr[pos][0] == a1) {
                            buf[j++] = unicode_euckr[pos][1];
                            buf[j++] = unicode_euckr[pos][2];
                            i += (utf8_len(str[i])-1);
                            break;
                        }

                        old_pos = pos;
                        if (unicode_euckr[old_pos][0] < a1) {
                            pos = (old_pos+1+max)/2;
                            min = old_pos+1;
                        } else {
                            pos = (min+old_pos-1)/2;
                            max = old_pos-1;
                        }
                    } while (pos != old_pos && min <= max);

                if (min < 0 || unicode_euckr[pos][0] != a1) {
                    buf[j++] = '?';
                    i += (len1 - 1);
                }
            } else {
                buf[j++] = (a0 & (unsigned char)0x80)?'?':a0;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from GBK to UTF-8
    @param str  GBK text
    @return Converted text (UTF-8 text)
 */
char* gbk_to_utf8(const char* str) {
    char *buf;
    long i, j, length;
    unsigned char c, d, a0, a1;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length*3 + 1];

        // GBK to UTF-8
        i = 0;
        j = 0;
        while (i < length) {
            c = str[i];
            d = str[i+1];
            if ((unsigned char)0x80 <= c) {
                long pos, min, max, s;
                unsigned char start;

                min = index_gbk_unicode[c];
                if (min < 0) {
                    max = min;
                } else if (c != 255) {
                    max = min + 255;
                    for (s = c+1; s <= 255; s++) {
                        if (0 <= index_gbk_unicode[s]) {
                            max = index_gbk_unicode[s]-1;
                            break;
                        }
                    }
                    if (UNICODE_GBK_TABLE_MAX-1 < max) {
                        max = GBK_UNICODE_TABLE_MAX-1;
                    }
                } else {
                    max = GBK_UNICODE_TABLE_MAX-1;
                }

                start = start_gbk_unicode[c];
                if (0 <= min && min <= max && start <= d && d <= start + (max - min)) {
                    pos = min + (d - start);
                    a0 = gbk_unicode[pos][0];
                    a1 = gbk_unicode[pos][1];
                    if (!(a0 == 0 && a1 == 0)) {
                        if (a0 == (unsigned char)0x00 && (a1 & (unsigned char)0x80) == (unsigned char)0x00) {
                            buf[j++] = a1;
                        } else if (a0 <= (unsigned char)0x07) {
                            buf[j++] = 0xC0 | ((a0 & 7) << 2) | (a1 >> 6);
                            buf[j++] = 0x80 | (0x3F & a1);
                        } else {
                            buf[j++] = 0xE0 | (a0 >> 4);
                            buf[j++] = 0x80 | ((0x3F & (a0 << 2)) | (a1 >> 6));
                            buf[j++] = 0x80 | (0x3F & a1);
                        }
                        i++;
                    } else {
                        buf[j++] = '?';
                    }
                } else {
                    buf[j++] = '?';
                }
            } else {
                buf[j++] = c;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from UTF-8 to GBK
    @param str  UTF-8 text
    @return Converted text (GBK text)
 */
char* utf8_to_gbk(const char* str) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1, a2;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*2;
        buf = new char[length2 + 1];

        // UTF-8 to GBK
        i = 0;
        j = 0;
        while (i < length && j <= length2) {
            a0 = str[i];
            if (isutf8_1(a0) && i+utf8_len(a0)-1 < length) {
                long pos, min, max, s;
                unsigned short len1;
                unsigned char start;

                a1 = str[i+1];
                a2 = str[i+2];
                if (a0 == 0xEF && a1 == 0xBB && a2 == 0xBF) {
                    i += 3;
                    continue;
                }

                if (a0 == 0xE2 && a1 == 0x80 && a2 == 0xBE) {
                    buf[j++] = 0x7E;
                    i += 3;
                    continue;
                }

                len1 = utf8_len(str[i]);
                if (len1 == 2) {
                    a0 = 0x07 & (str[i] >> 2);
                    a1 = (str[i] << 6) | (0x3F & str[i+1]);
                } else if (len1 == 3) {
                    a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                    a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
                } else {
                    buf[j++] = '?';
                    i += len1;
                    continue;
                }

                min = index_unicode_gbk[a0];
                if (min < 0) {
                    max = min;
                } else if (a0 != 255) {
                    max = min + 255;
                    for (s = a0+1; s <= 255; s++) {
                        if (0 <= index_unicode_gbk[s]) {
                            max = index_unicode_gbk[s]-1;
                            break;
                        }
                    }
                    if (UNICODE_GBK_TABLE_MAX-1 < max) {
                        max = UNICODE_GBK_TABLE_MAX-1;
                    }
                } else {
                    max = UNICODE_GBK_TABLE_MAX-1;
                }

                start = start_unicode_gbk[a0];
                if (0 <= min && min <= max && start <= a1 && a1 <= start + (max - min)) {
                    pos = min + (a1 - start);
                    if (!(unicode_gbk[pos][0] == 0 && unicode_gbk[pos][1] == 0)) {
                        buf[j++] = unicode_gbk[pos][0];
                        buf[j++] = unicode_gbk[pos][1];
                        i += (utf8_len(str[i])-1);
                    } else {
                        buf[j++] = '?';
                        i += (len1 - 1);
                    }
                } else {
                    buf[j++] = '?';
                    i += (len1 - 1);
                }
            } else {
                buf[j++] = (a0 & (unsigned char)0x80)?'?':a0;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from BIG5 to UTF-8
    @param str  BIG5 text.
    @return Converted text (UTF-8 text)
 */
char* big5_to_utf8(const char* str) {
    char *buf;
    long i, j, length;
    unsigned char c, d, a0, a1;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        buf = new char[length*3 + 1];

        // BIG5 to UTF-8
        i = 0;
        j = 0;
        while (i < length) {
            c = str[i];
            d = str[i+1];
            if ((unsigned char)0x80 <= c) {
                int pos, old_pos, min, max, s;

                min = index_big5_unicode[c];
                if (min < 0) {
                    max = min;
                } else if (c != 255) {
                    max = min + 255;
                    for (s = c+1; s <= 255; s++) {
                        if (0 <= index_big5_unicode[s]) {
                            max = index_big5_unicode[s]-1;
                            break;
                        }
                    }
                    if (UNICODE_BIG5_TABLE_MAX-1 < max) {
                        max = BIG5_UNICODE_TABLE_MAX-1;
                    }
                } else {
                    max = BIG5_UNICODE_TABLE_MAX-1;
                }
                old_pos = -1;
                pos = (min+max)/2;
                if (0 <= min) do {
                        if (big5_unicode[pos][0] == d) {
                            break;
                        }

                        old_pos = pos;
                        if (big5_unicode[old_pos][0] < d) {
                            pos = (old_pos+1+max)/2;
                            min = old_pos+1;
                        } else {
                            pos = (min+old_pos-1)/2;
                            max = old_pos-1;
                        }
                    } while (pos != old_pos && min <= max);

                if (0 <= min && pos != old_pos && min <= max) {
                    a0 = big5_unicode[pos][1];
                    a1 = big5_unicode[pos][2];
                    if (a0 == (unsigned char)0x00 && (a1 & (unsigned char)0x80) == (unsigned char)0x00) {
                        buf[j++] = a1;
                    } else if (a0 <= (unsigned char)0x07) {
                        buf[j++] = 0xC0 | ((a0 & 7) << 2) | (a1 >> 6);
                        buf[j++] = 0x80 | (0x3F & a1);
                    } else {
                        buf[j++] = 0xE0 | (a0 >> 4);
                        buf[j++] = 0x80 | ((0x3F & (a0 << 2)) | (a1 >> 6));
                        buf[j++] = 0x80 | (0x3F & a1);
                    }
                    i++;
                } else {
                    buf[j++] = '?';
                }
            } else {
                buf[j++] = c;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from UTF-8 to BIG5
    @param str  UTF-8 text
    @return Converted text (BIG5 text)
 */
char* utf8_to_big5(const char* str) {
    char *buf;
    long i, j, length, length2;
    unsigned char a0, a1, a2;

    buf = NULL;

    if (str != NULL) {
        // Memory allocation
        length = strlen(str);
        length2 = length*2;
        buf = new char[length2 + 1];

        // UTF-8 to BIG5
        i = 0;
        j = 0;
        while (i < length && j <= length2) {
            a0 = str[i];
            if (isutf8_1(a0) && i+utf8_len(a0)-1 < length) {
                int pos, old_pos, min, max, s;
                unsigned short len1;

                a1 = str[i+1];
                a2 = str[i+2];
                if (a0 == 0xEF && a1 == 0xBB && a2 == 0xBF) {
                    i += 3;
                    continue;
                }

                if (a0 == 0xE2 && a1 == 0x80 && a2 == 0xBE) {
                    buf[j++] = 0x7E;
                    i += 3;
                    continue;
                }

                len1 = utf8_len(str[i]);
                if (len1 == 2) {
                    a0 = 0x07 & (str[i] >> 2);
                    a1 = (str[i] << 6) | (0x3F & str[i+1]);
                } else if (len1 == 3) {
                    a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                    a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
                } else {
                    buf[j++] = '?';
                    i += len1;
                    continue;
                }

                min = index_unicode_big5[a0];
                if (min < 0) {
                    max = min;
                } else if (a0 != 255) {
                    max = min + 255;
                    for (s = a0+1; s <= 255; s++) {
                        if (0 <= index_unicode_big5[s]) {
                            max = index_unicode_big5[s]-1;
                            break;
                        }
                    }
                    if (UNICODE_BIG5_TABLE_MAX-1 < max) {
                        max = UNICODE_BIG5_TABLE_MAX-1;
                    }
                } else {
                    max = UNICODE_BIG5_TABLE_MAX-1;
                }
                old_pos = -1;
                pos = (min+max)/2;
                if (0 <= min) do {
                        if (unicode_big5[pos][0] == a1) {
                            buf[j++] = unicode_big5[pos][1];
                            buf[j++] = unicode_big5[pos][2];
                            i += (utf8_len(str[i])-1);
                            break;
                        }

                        old_pos = pos;
                        if (unicode_big5[old_pos][0] < a1) {
                            pos = (old_pos+1+max)/2;
                            min = old_pos+1;
                        } else {
                            pos = (min+old_pos-1)/2;
                            max = old_pos-1;
                        }
                    } while (pos != old_pos && min <= max);

                if (min < 0 || unicode_big5[pos][0] != a1) {
                    buf[j++] = '?';
                    i += (len1 - 1);
                }
            } else {
                buf[j++] = (a0 & (unsigned char)0x80)?'?':a0;
            }
            i++;
        }
        buf[j] = '\0';
    }

    return buf;
}


/*! Convert character set from UTF-7 to UTF-8
    @param str  UTF-7 text
    @return Converted text (UTF-8 text)
 */
char* utf7_to_utf8(const char* str) {
    return modutf7_to_utf8(str);
}


/*! Convert character set from UTF-8 to UTF-7
    @param str  UTF-8 text
    @return Converted text (UTF-7 text)
 */
char* utf8_to_utf7(const char* str) {
    unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
    unsigned long ucs4, bitbuf;
    const unsigned char hextab[256] = {
        //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 0
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 1
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 2
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0, // 3
        0, 10, 11, 12, 13, 14, 15,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 4
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 5
        0, 10, 11, 12, 13, 14, 15,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 6
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 7
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 8
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 9
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // A
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // B
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // C
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // D
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // E
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0  // F
    };
    const char base64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; // base64
    const unsigned long UTF16MASK      = 0x03FFUL;
    const unsigned long UTF16SHIFT     = 10;
    const unsigned long UTF16BASE      = 0x10000UL;
    const unsigned long UTF16HIGHSTART = 0xD800UL;
    const unsigned long UTF16LOSTART   = 0xDC00UL;
    const int malloc_base_size = 64;
    int str_malloced_size;
    char *utf7_str;
    int col;

    if (str == NULL) {
        return NULL;
    }

    utf8pos = 0;
    ucs4 = 0;
    bitbuf = 0;

    str_malloced_size = malloc_base_size;
    utf7_str = new char [str_malloced_size];
    col = 0;
    utf7_str[col] = '\0';

    utf7mode = 0;
    utf8total = 0;
    bitstogo = 0;
    while ((c = (unsigned char)*str) != '\0') {
        str++;
        // undo hex-encoding
        if (c == '%' && str[0] != '\0' && str[1] != '\0') {
            c = (hextab[(short)str[0]] << 4) | hextab[(short)str[1]];
            str += 2;
        }
        // normal character?
        if (c >= ' ' && c <= '~') {
            // switch out of UTF-7 mode
            if (utf7mode) {
                if (bitstogo) {
                    if (str_malloced_size - col - 1 < 1) {
                        char *tmp_utf7_str;
                        str_malloced_size += malloc_base_size;
                        tmp_utf7_str = new char [str_malloced_size];
                        memcpy(tmp_utf7_str, utf7_str, col);
                        delete [] utf7_str;
                        utf7_str = tmp_utf7_str;
                    }
                    utf7_str[col] = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
                    col++;
                }
                if (str_malloced_size - col - 1 < 1) {
                    char *tmp_utf7_str;
                    str_malloced_size += malloc_base_size;
                    tmp_utf7_str = new char [str_malloced_size];
                    memcpy(tmp_utf7_str, utf7_str, col);
                    delete [] utf7_str;
                    utf7_str = tmp_utf7_str;
                }
                utf7_str[col] = '-';
                col++;
                utf7mode = 0;
            }
            if (str_malloced_size - col - 1 < 1) {
                char *tmp_utf7_str;
                str_malloced_size += malloc_base_size;
                tmp_utf7_str = new char [str_malloced_size];
                memcpy(tmp_utf7_str, utf7_str, col);
                delete [] utf7_str;
                utf7_str = tmp_utf7_str;
            }
            utf7_str[col] = c;
            col++;
            // encode '+' as '+-'
            if (c == '+') {
                if (str_malloced_size - col - 1 < 1) {
                    char *tmp_utf7_str;
                    str_malloced_size += malloc_base_size;
                    tmp_utf7_str = new char [str_malloced_size];
                    memcpy(tmp_utf7_str, utf7_str, col);
                    delete [] utf7_str;
                    utf7_str = tmp_utf7_str;
                }
                utf7_str[col] = '-';
                col++;
            }
            continue;
        }
        // switch to UTF-7 mode
        if (!utf7mode) {
            if (str_malloced_size - col - 1 < 1) {
                char *tmp_utf7_str;
                str_malloced_size += malloc_base_size;
                tmp_utf7_str = new char [str_malloced_size];
                memcpy(tmp_utf7_str, utf7_str, col);
                delete [] utf7_str;
                utf7_str = tmp_utf7_str;
            }
            utf7_str[col] = '+';
            col++;
            utf7mode = 1;
            bitbuf = 0;
            bitstogo = 0;
        }
        // Encode US-ASCII characters as themselves
        if (c < 0x80) {
            ucs4 = c;
            utf8total = 1;
        } else if (utf8total) {
            // save UTF8 bits into UCS4
            ucs4 = (ucs4 << 6) | (c & 0x3FUL);
            if (++utf8pos < utf8total) {
                continue;
            }
        } else {
            utf8pos = 1;
            if (c < 0xE0) {
                utf8total = 2;
                ucs4 = c & 0x1F;
            } else if (c < 0xF0) {
                utf8total = 3;
                ucs4 = c & 0x0F;
            } else {
                // NOTE: can't convert UTF8 sequences longer than 4
                utf8total = 4;
                ucs4 = c & 0x03;
            }
            continue;
        }
        // loop to split ucs4 into two utf16 chars if necessary
        utf8total = 0;
        do {
            if (ucs4 >= UTF16BASE) {
                ucs4 -= UTF16BASE;
                bitbuf = (((bitbuf << 16) | ((ucs4 >> UTF16SHIFT))) + UTF16HIGHSTART);
                ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
                utf16flag = 1;
            } else {
                bitbuf = (bitbuf << 16) | ucs4;
                utf16flag = 0;
            }
            bitstogo += 16;
            // spew out base64
            while (bitstogo >= 6) {
                bitstogo -= 6;
                if (str_malloced_size - col - 1 < 1) {
                    char *tmp_utf7_str;
                    str_malloced_size += malloc_base_size;
                    tmp_utf7_str = new char [str_malloced_size];
                    memcpy(tmp_utf7_str, utf7_str, col);
                    delete [] utf7_str;
                    utf7_str = tmp_utf7_str;
                }
                utf7_str[col] = base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
                col++;
            }
        } while (utf16flag);
    }

    // if in UTF-7 mode, finish in ASCII
    if (utf7mode) {
        if (bitstogo) {
            if (str_malloced_size - col - 1 < 1) {
                char *tmp_utf7_str;
                str_malloced_size += malloc_base_size;
                tmp_utf7_str = new char [str_malloced_size];
                memcpy(tmp_utf7_str, utf7_str, col);
                delete [] utf7_str;
                utf7_str = tmp_utf7_str;
            }
            utf7_str[col] = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
            col++;
        }
        if (str_malloced_size - col - 1 < 1) {
            char *tmp_utf7_str;
            str_malloced_size += malloc_base_size;
            tmp_utf7_str = new char [str_malloced_size];
            memcpy(tmp_utf7_str, utf7_str, col);
            delete [] utf7_str;
            utf7_str = tmp_utf7_str;
        }
        utf7_str[col] = '-';
        col++;
    }

    utf7_str[col] = '\0';
    return utf7_str;
}


/*! Convert character set from IMAP4 modified UTF-7 to UTF-8
    @param str  IMAP4 modified UTF-7 text
    @return Converted text (UTF-8 text)
 */
char* modutf7_to_utf8(const char* str) {
    unsigned char c, i, bitcount;
    unsigned long ucs4, utf16, bitbuf;
    unsigned char base64[256] = {
        //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 0
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 1
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 63, 64, 64, 63, // 2
        52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64, // 3
        64,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 4
        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, // 5
        64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 6
        41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64, // 7
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 8
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 9
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // A
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // B
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // C
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // D
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // E
        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64  // F
    };
    unsigned char utf8[6];
    const char hex[] = "0123456789ABCDEF"; // hexadecimal lookup table
    const int undefined_num = 64;
    const unsigned long UTF16SHIFT     = 10;
    const unsigned long UTF16BASE      = 0x10000UL;
    const unsigned long UTF16HIGHSTART = 0xD800UL;
    const unsigned long UTF16HIGHEND   = 0xDBFFUL;
    const unsigned long UTF16LOSTART   = 0xDC00UL;
    const unsigned long UTF16LOEND   = 0xDFFFUL;
    const int malloc_base_size = 64;
    int str_malloced_size;
    char *utf8_str;
    int col;

    if (str == NULL) {
        return NULL;
    }

    str_malloced_size = malloc_base_size;
    utf8_str = new char [str_malloced_size];
    col = 0;
    utf8_str[col] = '\0';

    // loop until end of string
    while (*str != '\0') {
        c = *str++;
        // deal with literal characters and &-, +-
        if ((c != '&' && c != '+') || *str == '-') {
            if (c < ' ' || c > '~') {
                // hex encode if necessary
                if (str_malloced_size - col - 1 < 3) {
                    char *tmp_utf8_str;
                    str_malloced_size += malloc_base_size;
                    tmp_utf8_str = new char [str_malloced_size];
                    memcpy(tmp_utf8_str, utf8_str, col);
                    delete [] utf8_str;
                    utf8_str = tmp_utf8_str;
                }
                if (c != '\r' && c != '\n') {
                    utf8_str[col] = '%';
                    utf8_str[col+1] = hex[c >> 4];
                    utf8_str[col+2] = hex[c & 0x0f];
                    col += 3;
                } else {
                    utf8_str[col] = (char)c;
                    col++;
                }
            } else {
                // encode literally
                if (str_malloced_size - col - 1 < 1) {
                    char *tmp_utf8_str;
                    str_malloced_size += malloc_base_size;
                    tmp_utf8_str = new char [str_malloced_size];
                    memcpy(tmp_utf8_str, utf8_str, col);
                    delete [] utf8_str;
                    utf8_str = tmp_utf8_str;
                }
                utf8_str[col] = c;
                col++;
            }
            // skip over the '-' if this is an &- or +- sequence
            if (c == '&' || c == '+') ++str;
        } else {
            // convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX
            bitbuf = 0;
            bitcount = 0;
            ucs4 = 0;
            while ((c = base64[(unsigned char) *str]) != undefined_num) {
                ++str;
                bitbuf = (bitbuf << 6) | c;
                bitcount += 6;
                // enough bits for a UTF-16 character?
                if (bitcount >= 16) {
                    bitcount -= 16;
                    utf16 = (bitcount ? bitbuf >> bitcount
                             : bitbuf) & 0xffff;
                    // convert UTF16 to UCS4
                    if
                    (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) {
                        ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
                        continue;
                    } else if
                    (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) {
                        ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
                    } else {
                        ucs4 = utf16;
                    }
                    // convert UTF-16 range of UCS4 to UTF-8
                    if (ucs4 <= 0x7fUL) {
                        utf8[0] = ucs4;
                        i = 1;
                    } else if (ucs4 <= 0x7ffUL) {
                        utf8[0] = 0xc0 | (ucs4 >> 6);
                        utf8[1] = 0x80 | (ucs4 & 0x3f);
                        i = 2;
                    } else if (ucs4 <= 0xffffUL) {
                        utf8[0] = 0xe0 | (ucs4 >> 12);
                        utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
                        utf8[2] = 0x80 | (ucs4 & 0x3f);
                        i = 3;
                    } else {
                        utf8[0] = 0xf0 | (ucs4 >> 18);
                        utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
                        utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
                        utf8[3] = 0x80 | (ucs4 & 0x3f);
                        i = 4;
                    }

                    for (c = 0; c < i; ++c) {
                        if (str_malloced_size - col - 1 < 1) {
                            char *tmp_utf8_str;
                            str_malloced_size += malloc_base_size;
                            tmp_utf8_str = new char [str_malloced_size];
                            memcpy(tmp_utf8_str, utf8_str, col);
                            delete [] utf8_str;
                            utf8_str = tmp_utf8_str;
                        }
                        utf8_str[col] = utf8[c];
                        col++;
                    }
                }
            }
            // skip over trailing '-' in modified UTF-7 encoding
            if (*str == '-') ++str;
        }
    }

    utf8_str[col] = '\0';
    return utf8_str;
}


/*! Convert character set from UTF-8 to IMAP4 modified UTF-7
    @param str  UTF-8 text
    @return Converted text (IMAP4 modified UTF-7 text)
 */
char* utf8_to_modutf7(const char* str) {
    unsigned int utf8pos=0, utf8total, c, utf7mode, bitstogo, utf16flag;
    unsigned long ucs4=0, bitbuf=0;
    const unsigned char hextab[256] = {
        //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 0
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 1
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 2
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0, // 3
        0, 10, 11, 12, 13, 14, 15,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 4
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 5
        0, 10, 11, 12, 13, 14, 15,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 6
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 7
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 8
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // 9
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // A
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // B
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // C
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // D
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // E
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0  // F
    };
    const char base64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; // UTF-7 modified base64
    const unsigned long UTF16MASK      = 0x03FFUL;
    const unsigned long UTF16SHIFT     = 10;
    const unsigned long UTF16BASE      = 0x10000UL;
    const unsigned long UTF16HIGHSTART = 0xD800UL;
    const unsigned long UTF16LOSTART   = 0xDC00UL;
    const int malloc_base_size = 64;
    int str_malloced_size;
    char *modutf7_str;
    int col;

    if (str == NULL) {
        return NULL;
    }

    str_malloced_size = malloc_base_size;
    modutf7_str = new char [str_malloced_size];
    col = 0;
    modutf7_str[col] = '\0';

    utf7mode = 0;
    utf8total = 0;
    bitstogo = 0;
    while ((c = (unsigned char)*str) != '\0') {
        str++;
        // undo hex-encoding
        if (c == '%' && str[0] != '\0' && str[1] != '\0') {
            c = (hextab[(short)str[0]] << 4) | hextab[(short)str[1]];
            str += 2;
        }
        // normal character?
        if (c >= ' ' && c <= '~') {
            // switch out of UTF-7 mode
            if (utf7mode) {
                if (bitstogo) {
                    if (str_malloced_size - col - 1 < 1) {
                        char *tmp_modutf7_str;
                        str_malloced_size += malloc_base_size;
                        tmp_modutf7_str = new char [str_malloced_size];
                        memcpy(tmp_modutf7_str, modutf7_str, col);
                        delete [] modutf7_str;
                        modutf7_str = tmp_modutf7_str;
                    }
                    modutf7_str[col] = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
                    col++;
                }
                if (str_malloced_size - col - 1 < 1) {
                    char *tmp_modutf7_str;
                    str_malloced_size += malloc_base_size;
                    tmp_modutf7_str = new char [str_malloced_size];
                    memcpy(tmp_modutf7_str, modutf7_str, col);
                    delete [] modutf7_str;
                    modutf7_str = tmp_modutf7_str;
                }
                modutf7_str[col] = '-';
                col++;
                utf7mode = 0;
            }
            if (str_malloced_size - col - 1 < 1) {
                char *tmp_modutf7_str;
                str_malloced_size += malloc_base_size;
                tmp_modutf7_str = new char [str_malloced_size];
                memcpy(tmp_modutf7_str, modutf7_str, col);
                delete [] modutf7_str;
                modutf7_str = tmp_modutf7_str;
            }
            modutf7_str[col] = c;
            col++;
            // encode '&' as '&-'
            if (c == '&') {
                if (str_malloced_size - col - 1 < 1) {
                    char *tmp_modutf7_str;
                    str_malloced_size += malloc_base_size;
                    tmp_modutf7_str = new char [str_malloced_size];
                    memcpy(tmp_modutf7_str, modutf7_str, col);
                    delete [] modutf7_str;
                    modutf7_str = tmp_modutf7_str;
                }
                modutf7_str[col] = '-';
                col++;
            }
            continue;
        }
        // switch to UTF-7 mode
        if (!utf7mode) {
            if (str_malloced_size - col - 1 < 1) {
                char *tmp_modutf7_str;
                str_malloced_size += malloc_base_size;
                tmp_modutf7_str = new char [str_malloced_size];
                memcpy(tmp_modutf7_str, modutf7_str, col);
                delete [] modutf7_str;
                modutf7_str = tmp_modutf7_str;
            }
            modutf7_str[col] = '&';
            col++;
            utf7mode = 1;
            bitbuf = 0;
            bitstogo = 0;
        }
        // Encode US-ASCII characters as themselves
        if (c < 0x80) {
            ucs4 = c;
            utf8total = 1;
        } else if (utf8total) {
            // save UTF8 bits into UCS4
            ucs4 = (ucs4 << 6) | (c & 0x3FUL);
            if (++utf8pos < utf8total) {
                continue;
            }
        } else {
            utf8pos = 1;
            if (c < 0xE0) {
                utf8total = 2;
                ucs4 = c & 0x1F;
            } else if (c < 0xF0) {
                utf8total = 3;
                ucs4 = c & 0x0F;
            } else {
                // NOTE: can't convert UTF8 sequences longer than 4
                utf8total = 4;
                ucs4 = c & 0x03;
            }
            continue;
        }
        // loop to split ucs4 into two utf16 chars if necessary
        utf8total = 0;
        do {
            if (ucs4 >= UTF16BASE) {
                ucs4 -= UTF16BASE;
                bitbuf = (((bitbuf << 16) | ((ucs4 >> UTF16SHIFT))) + UTF16HIGHSTART);
                ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
                utf16flag = 1;
            } else {
                bitbuf = (bitbuf << 16) | ucs4;
                utf16flag = 0;
            }
            bitstogo += 16;
            // spew out base64
            while (bitstogo >= 6) {
                bitstogo -= 6;
                if (str_malloced_size - col - 1 < 1) {
                    char *tmp_modutf7_str;
                    str_malloced_size += malloc_base_size;
                    tmp_modutf7_str = new char [str_malloced_size];
                    memcpy(tmp_modutf7_str, modutf7_str, col);
                    delete [] modutf7_str;
                    modutf7_str = tmp_modutf7_str;
                }
                modutf7_str[col] = base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
                col++;
            }
        } while (utf16flag);
    }

    // if in UTF-7 mode, finish in ASCII
    if (utf7mode) {
        if (bitstogo) {
            if (str_malloced_size - col - 1 < 1) {
                char *tmp_modutf7_str;
                str_malloced_size += malloc_base_size;
                tmp_modutf7_str = new char [str_malloced_size];
                memcpy(tmp_modutf7_str, modutf7_str, col);
                delete [] modutf7_str;
                modutf7_str = tmp_modutf7_str;
            }
            modutf7_str[col] = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
            col++;
        }
        if (str_malloced_size - col - 1 < 1) {
            char *tmp_modutf7_str;
            str_malloced_size += malloc_base_size;
            tmp_modutf7_str = new char [str_malloced_size];
            memcpy(tmp_modutf7_str, modutf7_str, col);
            delete [] modutf7_str;
            modutf7_str = tmp_modutf7_str;
        }
        modutf7_str[col] = '-';
        col++;
    }

    modutf7_str[col] = '\0';
    return modutf7_str;
}


#endif


/*! Get display width of text
    @param str     UTF-8 text
    @return Width of string
 */
long utf8_width(const char* str) {
    long i, width;
    unsigned short u8len;
    unsigned char a0, a1;
    int pos, old_pos, min, max, s;
    char buf[7];

    if (str == NULL) {
        return 0;
    }

    width = 0;

    i = 0;
    while (str[i] != '\0') {
        u8len = utf8_len(str[i]);
        if ((unsigned char)(str[i]) == (unsigned char)0xEF &&
                (unsigned char)(str[i+1]) == (unsigned char)0xBB &&
                (unsigned char)(str[i+2]) == (unsigned char)0xBF) {
            i += 3;
        } else if (u8len <= 1) {
            width++;
            i++;
        } else if (u8len == 2 || u8len == 3) {
            // UTF-8 to EUC-JP
            if (u8len == 2) {
                a0 = 0x07 & (str[i] >> 2);
                a1 = (str[i] << 6) | (0x3F & str[i+1]);
            } else {
                a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
            }

            buf[0] = '\0';
            min = index_unicode_eucjp[a0];
            if (min < 0) {
                max = min;
            } else if (a0 != 255) {
                max = min + 255;
                for (s = a0+1; s <= 255; s++) {
                    if (0 <= index_unicode_eucjp[s]) {
                        max = index_unicode_eucjp[s]-1;
                        break;
                    }
                }
                if (UNICODE_EUCJP_TABLE_MAX-1 < max) {
                    max = UNICODE_EUCJP_TABLE_MAX-1;
                }
            } else {
                max = UNICODE_EUCJP_TABLE_MAX-1;
            }
            old_pos = -1;
            pos = (min+max)/2;
            if (0 <= min) do {
                    if (unicode_eucjp[pos][0] == a1) {
                        buf[0] = unicode_eucjp[pos][1];
                        buf[1] = unicode_eucjp[pos][2];
                        buf[2] = '\0';
                        break;
                    }

                    old_pos = pos;
                    if (unicode_eucjp[old_pos][0] < a1) {
                        pos = (old_pos+1+max)/2;
                        min = old_pos+1;
                    } else {
                        pos = (min+old_pos-1)/2;
                        max = old_pos-1;
                    }
                } while (pos != old_pos && min <= max);

            if (buf[0] != '\0') {
                // EUC-JP to SJIS and check width
                a0 = buf[0];
                a1 = buf[1];
                if (a0 == 0x8E && ishankana(a1)) {
                    width++;
                } else if (iseuc(a0) && iseuc(a1)) {
                    width += 2;
                } else {
                    width++;
                }
            } else {
                if (u8len == 2) {
                    a0 = 0x07 & (str[i] >> 2);
                    a1 = (str[i] << 6) | (0x3F & str[i+1]);
                } else {
                    a0 = (str[i] << 4) | (0x0F & (str[i+1] >> 2));
                    a1 = (str[i+1] << 6) | (0x3F & str[i+2]);
                }

                if (a0 == 0) {
                    width++;
                } else {
                    unsigned long ucs4;
                    ucs4 = (a0 * 256) + a1;
                    if (0x1100 <= ucs4 &&
                            (ucs4 <= 0x115F || ucs4 == 0x2329 || ucs4 == 0x232A ||
                             (0x2E80 <= ucs4 && ucs4 <= 0xA4CF && ucs4 != 0x303F) ||
                             (0xAC00 <= ucs4 && ucs4 <= 0xD7A3) ||
                             (0xF900 <= ucs4 && ucs4 <= 0xFAFF) ||
                             (0xFE30 <= ucs4 && ucs4 <= 0xFE6F) ||
                             (0xFF00 <= ucs4 && ucs4 <= 0xFF60) ||
                             (0xFFE0 <= ucs4 && ucs4 <= 0xFFE6) ||
                             (0x20000 <= ucs4 && ucs4 <= 0x2FFFD) ||
                             (0x30000 <= ucs4 && ucs4 <= 0x3FFFD))) {
                        width += 2;
                    } else {
                        width++;
                    }
                }
            }
            i += u8len;

        } else {
            width += u8len;
            i += u8len;
        }
    }

    return width;
}


/*! Change text width
    @param str     UTF-8 text
    @param options converting options (ex. "AHI")
                   'A': ASCII to full width
                   'a': ASCII to half width
                   'H': half width katakana to full width hiragana
                   'K': half width katakana to full width katakana
                   'J': full width hiragana to full width katakana
                   'j': full width hiragana to half width katakana
                   'I': full width katakana to full width hiragana
                   'k': full width katakana to half width katakana
    @return Converted text (UTF-8 text)
 */
char* utf8_change_width(const char* str, const char *options) {
    char *buf;
    long i, j, length, length2;
    int OPTION_A, OPTION_a, OPTION_H, OPTION_K, OPTION_J, OPTION_j, OPTION_I, OPTION_k;

    buf = NULL;

    if (str != NULL && options != NULL) {
        // set options
        OPTION_A = strchr(options, 'A')?1:0;
        OPTION_a = strchr(options, 'a')?1:0;
        OPTION_H = strchr(options, 'H')?1:0;
        OPTION_K = strchr(options, 'K')?1:0;
        OPTION_J = strchr(options, 'J')?1:0;
        OPTION_j = strchr(options, 'j')?1:0;
        OPTION_I = strchr(options, 'I')?1:0;
        OPTION_k = strchr(options, 'k')?1:0;

        // Memory allocation
        length = strlen(str);
        length2 = length*6;
        buf = new char[length2 + 1];

        // change width
        i = 0;
        j = 0;
        while (i < length && j <= length2) {
            int mblen, converted, convlen, num;
            mblen = utf8_len(str[i]);
            if (length < i + mblen) {
                break;
            }
            converted = 0;

            // 'A': ASCII to full width
            if (OPTION_A && mblen == 1 && ((unsigned char)str[i] & (unsigned char)0x80) == (unsigned char)0) {
                for (num = 0; HALF_ASCII[num] != NULL; num++) {
                    convlen = strlen(HALF_ASCII[num]);
                    if (!strncmp(&str[i], HALF_ASCII[num], convlen)) {
                        strcpy(&buf[j], FULL_ASCII[num]);
                        i += convlen;
                        j += strlen(FULL_ASCII[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            // 'a': ASCII to half width
            if (OPTION_a && converted == 0 && mblen == 3 && ((unsigned char)str[i] & (unsigned char)0xE0) == (unsigned char)0xE0) {
                for (num = 0; FULL_ASCII[num] != NULL; num++) {
                    convlen = strlen(FULL_ASCII[num]);
                    if (!strncmp(&str[i], FULL_ASCII[num], convlen)) {
                        strcpy(&buf[j], HALF_ASCII[num]);
                        i += convlen;
                        j += strlen(HALF_ASCII[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            // 'H': half width katakana to full width hiragana
            if (OPTION_H && converted == 0 && 3 <= mblen && (unsigned char)str[i] == (unsigned char)0xEF) {
                for (num = 0; HALF_KANA[num] != NULL; num++) {
                    convlen = strlen(HALF_KANA[num]);
                    if (!strncmp(&str[i+1], HALF_KANA[num], convlen)) {
                        buf[j++] = (char)0xE3;
                        strcpy(&buf[j], FULL_HIRA[num]);
                        i += (convlen + 1);
                        j += strlen(FULL_HIRA[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            // 'K': half width katakana to full width katakana
            if (OPTION_K && converted == 0 && 3 <= mblen && (unsigned char)str[i] == (unsigned char)0xEF) {
                for (num = 0; HALF_KANA[num] != NULL; num++) {
                    convlen = strlen(HALF_KANA[num]);
                    if (!strncmp(&str[i+1], HALF_KANA[num], convlen)) {
                        buf[j++] = (char)0xE3;
                        strcpy(&buf[j], FULL_KANA[num]);
                        i += (convlen + 1);
                        j += strlen(FULL_KANA[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            // 'J': full width hiragana to full width katakana
            if (OPTION_J && converted == 0 && 3 <= mblen && (unsigned char)str[i] == (unsigned char)0xE3) {
                for (num = 0; FULL_HIRA[num] != NULL; num++) {
                    convlen = strlen(FULL_HIRA[num]);
                    if (!strncmp(&str[i+1], FULL_HIRA[num], convlen)) {
                        buf[j++] = (char)0xE3;
                        strcpy(&buf[j], FULL_KANA[num]);
                        i += (convlen + 1);
                        j += strlen(FULL_KANA[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            // 'j': full width hiragana to half width katakana
            if (OPTION_j && converted == 0 && 3 <= mblen && (unsigned char)str[i] == (unsigned char)0xE3) {
                for (num = 0; FULL_HIRA[num] != NULL; num++) {
                    convlen = strlen(FULL_HIRA[num]);
                    if (!strncmp(&str[i+1], FULL_HIRA[num], convlen)) {
                        buf[j++] = (char)0xEF;
                        strcpy(&buf[j], HALF_KANA[num]);
                        i += (convlen + 1);
                        j += strlen(HALF_KANA[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            // 'I': full width katakana to full width hiragana
            if (OPTION_I && converted == 0 && 3 <= mblen && (unsigned char)str[i] == (unsigned char)0xE3) {
                for (num = 0; FULL_KANA[num] != NULL; num++) {
                    convlen = strlen(FULL_KANA[num]);
                    if (!strncmp(&str[i+1], FULL_KANA[num], convlen)) {
                        buf[j++] = (char)0xE3;
                        strcpy(&buf[j], FULL_HIRA[num]);
                        i += (convlen + 1);
                        j += strlen(FULL_HIRA[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            // 'k': full width katakana to half width katakana
            if (OPTION_k && converted == 0 && 3 <= mblen && (unsigned char)str[i] == (unsigned char)0xE3) {
                for (num = 0; FULL_KANA[num] != NULL; num++) {
                    convlen = strlen(FULL_KANA[num]);
                    if (!strncmp(&str[i+1], FULL_KANA[num], convlen)) {
                        buf[j++] = (char)0xEF;
                        strcpy(&buf[j], HALF_KANA[num]);
                        i += (convlen + 1);
                        j += strlen(HALF_KANA[num]);
                        converted = 1;
                        break;
                    }
                }
            }

            if (converted == 0) {
                strncpy(&buf[j], &str[i], mblen);
                i += mblen;
                j += mblen;
            }
        }
        buf[j] = '\0';
    } else if (options != NULL) {
        length = strlen(str);
        buf = new char[length + 1];
        strcpy(buf, str);
    }

    return buf;
}


/*! Clean up UTF-8 string (remove invalid characters)
    @param str     string for cleaning up
    @return Pointer of cleaned up string (Same address of str)
 */
char* utf8_clean(char* str) {
    int ucs4_code;
    int utf8_size;
    int i;
    int len;
    unsigned char* pstr;
    unsigned char* peob;

    if (str == NULL) {
        return NULL;
    }

    pstr = (unsigned char *)str;
    peob = pstr + strlen((char *)pstr);

    while (pstr < peob) {
        utf8_size = utf8_len(pstr[0]);

        if (peob < pstr + utf8_size) {
            pstr[0] = '\0';
            break;
        }

        switch (utf8_size) {
            case 1:
                ucs4_code = pstr[0];
                len = 0;
                break;
            case 2:
                ucs4_code = pstr[0] & 0x1F;
                len = 2;
                break;
            case 3:
                ucs4_code = pstr[0] & 0x0F;
                len = 3;
                break;
            case 4:
                ucs4_code = pstr[0] & 0x07;
                len = 4;
                break;
            default:
                ucs4_code = -1;
                len = 0;
                break;
        }

        for (i = 1; i < len; i++) {
            if ((pstr[i] & 0xC0) != 0x80) {
                ucs4_code = -1;
                break;
            }
            ucs4_code = (ucs4_code << 6) | (pstr[i] & 0x3F);
        }

        switch (utf8_size) {
            case 1:
                if (!((0x00 <= ucs4_code) && (ucs4_code <= 0x7F))) {
                    ucs4_code = -1;
                }
                break;
            case 2:
                if (!((0x80 <= ucs4_code) && (ucs4_code <= 0x7FF))) {
                    ucs4_code = -1;
                }
                break;
            case 3:
                if (!(((0x800 <= ucs4_code) && (ucs4_code <= 0xD7FF)) ||
                        ((0xE000 <= ucs4_code) && (ucs4_code <= 0xFFFF)))) {
                    ucs4_code = -1;
                }
                break;
            case 4:
                if (!((0x1000 <= ucs4_code) && (ucs4_code <= 0x10FFFF))) {
                    ucs4_code = -1;
                }
                break;
            default:
                ucs4_code = -1;
                break;
        }

        if (ucs4_code < 0) {
            for (i = 0; i < utf8_size; i++) {
                pstr[i] = '?';
            }
        }
        pstr += utf8_size;
    }

    return str;
}

} // namespace apolloron
