/* Output a convertion table from multibyte character to UCS character
 * Copyright (C) 2006  MIRACLE LINUX CORPORATION.
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA  02110-1301, USA.
 */
#include <stdio.h>
#include <iconv.h>
#include <errno.h>

typedef unsigned long ucs4_t;

char singlebyte[256];
char doublebyte_1st[256];
char doublebyte_2nd[256];
char triplebyte_1st[256];
char triplebyte_2nd[256];
char triplebyte_3rd[256];
int search_doublebyte = 0;
int search_triplebyte = 0;
int search_quadbyte   = 0;

#define is_singlebyte(c) (singlebyte[(c)])
#define is_doublebyte(c1, c2) (doublebyte_1st[(c1)] && doublebyte_2nd[(c2)])
#define is_triplebyte(c1, c2, c3) (doublebyte_1st[(c1)] \
                                   && doublebyte_2nd[(c2)] \
                                   && doublebyte_2nd[(c3)])

#define MAXMB   10
#define MAXUCS4 10

void
print_mb2ucs_one_char(unsigned char *mb, size_t inlen, ucs4_t *ucs4, size_t outlen)
{
    int i;

    for (i = 0; i < inlen; i++) {
        printf("\\x%02X", mb[i]);
    }
    putchar(' ');
    for (i = 0; i < outlen; i++) {
        printf("<U%04X>", ucs4[i]);
    }
    putchar('\n');
}

void
error_mb2ucs_one_char(unsigned char *mb, size_t inlen, int pos)
{
    int i;

    for (i = 0; i < inlen; i++) {
        fprintf(stderr, "\\x%02X", mb[i]);
    }
    fputc(' ', stderr);
    switch (errno) {
    case E2BIG:
        fprintf(stderr, "iconv(): E2BIG\n");
        break;
    case EILSEQ:
        fprintf(stderr, "iconv(): EILSEQ(%d)\n", pos);
        break;
    case EINVAL:
        fprintf(stderr, "iconv(): EINVAL\n");
        break;
    default:
        perror("iconv()");
        break;
    }
}

void
dump_mb2ucs_1(iconv_t cd)
{
    int c;
    unsigned char mb[MAXMB];
    ucs4_t ucs4[MAXUCS4];
    char *inbuf, *outbuf;
    size_t inbytesleft, outbytesleft;

    memset(singlebyte, 0, 256);

    for (c = 0; c < 0x100; c++) {
        mb[0] = (unsigned char)c;
        inbuf = (char *)mb;
        outbuf = (char *)ucs4;
        inbytesleft = 1;
        outbytesleft = sizeof(ucs4);

        if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft)
            != (size_t)(-1))
        {
            print_mb2ucs_one_char(mb, 1,
                ucs4, (sizeof(ucs4) - outbytesleft) / sizeof(ucs4_t));
            singlebyte[c] = 1;
        } else {
            if (errno == EINVAL) {
                search_doublebyte = 1;
            }
            error_mb2ucs_one_char(mb, 1, inbuf - (char *)mb);
        }
    }
}

void
dump_mb2ucs_2(iconv_t cd)
{
    int c1, c2;
    unsigned char mb[MAXMB];
    ucs4_t ucs4[MAXUCS4];
    char *inbuf, *outbuf;
    size_t inbytesleft, outbytesleft;
    int i, outlen;

    memset(doublebyte_1st, 0, 256);
    memset(doublebyte_2nd, 0, 256);

    for (c1 = 0x00; c1 < 0x100; c1++) {
        if (is_singlebyte(c1))
            continue;
        for (c2 = 0x00; c2 < 0x100; c2++) {
            mb[0] = (unsigned char)c1;
            mb[1] = (unsigned char)c2;
            inbuf = (char *)mb;
            outbuf = (char *)ucs4;
            inbytesleft = 2;
            outbytesleft = sizeof(ucs4);

            if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft)
                != (size_t)(-1))
            {
                print_mb2ucs_one_char(mb, 2,
                    ucs4, (sizeof(ucs4) - outbytesleft) / sizeof(ucs4_t));
                doublebyte_1st[c1] = 1;
                doublebyte_2nd[c2] = 1;
            } else {
                if (errno == EINVAL) {
                    search_triplebyte = 1;
                }
                error_mb2ucs_one_char(mb, 2, inbuf - (char *)mb);
            }
        }
    }
}

void
dump_mb2ucs_3(iconv_t cd)
{
    int c1, c2, c3;
    unsigned char mb[MAXMB];
    ucs4_t ucs4[MAXUCS4];
    char *inbuf, *outbuf;
    size_t inbytesleft, outbytesleft;
    int i, outlen;

    memset(triplebyte_1st, 0, 256);
    memset(triplebyte_2nd, 0, 256);
    memset(triplebyte_3rd, 0, 256);

    for (c1 = 0x00; c1 < 0x100; c1++) {
        if (is_singlebyte(c1))
            continue;
        for (c2 = 0x00; c2 < 0x100; c2++) {
            if (is_doublebyte(c1, c2))
                continue;
            for (c3 = 0x00; c3 < 0x100; c3++) {
                mb[0] = (unsigned char)c1;
                mb[1] = (unsigned char)c2;
                mb[2] = (unsigned char)c3;
                inbuf = (char *)mb;
                outbuf = (char *)ucs4;
                inbytesleft = 3;
                outbytesleft = sizeof(ucs4);

                if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft)
                    != (size_t)(-1))
                {
                    print_mb2ucs_one_char(mb, 3,
                        ucs4, (sizeof(ucs4) - outbytesleft) / sizeof(ucs4_t));
                    triplebyte_1st[c1] = 1;
                    triplebyte_2nd[c2] = 1;
                    triplebyte_3rd[c3] = 1;
                } else {
                    if (errno == EINVAL) {
                        search_quadbyte = 1;
                    }
                    error_mb2ucs_one_char(mb, 3, inbuf - (char *)mb);
                }
            }
        }
    }
}
 
dump_mb2ucs_4(iconv_t cd)
{
    int c1, c2, c3, c4;
    unsigned char mb[MAXMB];
    ucs4_t ucs4[MAXUCS4];
    char *inbuf, *outbuf;
    size_t inbytesleft, outbytesleft;
    int i, outlen;

    for (c1 = 0x00; c1 < 0x100; c1++) {
        if (is_singlebyte(c1))
            continue;
        for (c2 = 0x00; c2 < 0x100; c2++) {
            if (is_doublebyte(c1, c2))
                continue;
            for (c3 = 0x00; c3 < 0x100; c3++) {
                if (is_triplebyte(c1, c2, c3))
                    continue;
                for (c4 = 0x00; c4 < 0x100; c4++) {
                    mb[0] = (unsigned char)c1;
                    mb[1] = (unsigned char)c2;
                    mb[2] = (unsigned char)c3;
                    mb[3] = (unsigned char)c4;
                    inbuf = (char *)mb;
                    outbuf = (char *)ucs4;
                    inbytesleft = 4;
                    outbytesleft = sizeof(ucs4);

                    if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft)
                        != (size_t)(-1))
                    {
                        print_mb2ucs_one_char(mb, 4,
                            ucs4, (sizeof(ucs4) - outbytesleft) / sizeof(ucs4_t));
                    } else {
                        error_mb2ucs_one_char(mb, 4, inbuf - (char *)mb);
                    }
                }
            }
        }
    }
}
 
dump_mb2ucs(const char *codeset)
{
    iconv_t cd;
    ucs4_t endian = 0x1234;
    char *ucs4_codeset;

    if (*((unsigned char *)&endian) == 0)
        ucs4_codeset = "UCS-4BE";
    else
        ucs4_codeset = "UCS-4LE";

    if ((cd = iconv_open(ucs4_codeset, codeset)) == (iconv_t)(-1)) {
        fprintf(stderr, "%s is not suppoted.\n", codeset);
        exit(1);
    }
    dump_mb2ucs_1(cd);
    if (search_doublebyte)
        dump_mb2ucs_2(cd);
    if (search_triplebyte)
        dump_mb2ucs_3(cd);
    if (search_quadbyte)
        dump_mb2ucs_4(cd);

    iconv_close(cd);
}

void
init_table(void)
{
    int i;

    for (i = 0; i < 256; i++) {
	singlebyte[i] = 0;
	doublebyte_1st[i] = doublebyte_2nd[i] = 0;
	triplebyte_1st[i] = triplebyte_2nd[i] = triplebyte_3rd[i] = 0;
    }
}

int
main(int argc, char *argv[])
{
    char *codeset;

    if (argc != 2) {
        fprintf(stderr, "Usage: mb2ucs codeset\n");
        return 1;
    }
    codeset = argv[1];

    init_table();
    dump_mb2ucs(codeset);

    return 0;
}
