/* Output a convertion table from iso-2022-jp character to UCS character
 * Copyright (C) 2006  MIRACLE LINUX CORPORATION.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA  02110-1301, USA.
 */
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <iconv.h>
#include <errno.h>

char singlebyte[256];

#define is_singlebyte(c) (singlebyte[(c)])

typedef unsigned long ucs4_t;

#define MAXMB   32
#define MAXUCS4 32
#define MAXESCSEQ 10

void
print_mb2ucs_one_char(unsigned char *mb, size_t inlen, ucs4_t *ucs4, size_t outlen)
{
    int i;

    for (i = 0; i < inlen; i++) {
        printf("\\x%02X", mb[i]);
    }
    putchar(' ');
    for (i = 0; i < outlen; i++) {
        printf("<U%04X>", ucs4[i]);
    }
    putchar('\n');
}

void
error_mb2ucs_one_char(unsigned char *mb, size_t inlen, int pos)
{
    int i;

    for (i = 0; i < inlen; i++) {
        fprintf(stderr, "\\x%02X", mb[i]);
    }
    fputc(' ', stderr);
    switch (errno) {
    case E2BIG:
        fprintf(stderr, "iconv(): E2BIG\n");
        break;
    case EILSEQ:
        fprintf(stderr, "iconv(): EILSEQ(%d)\n", pos);
        break;
    case EINVAL:
        fprintf(stderr, "iconv(): EINVAL\n");
        break;
    default:
        perror("iconv()");
        break;
    }
}

void
dump_mb2ucs_iso2022_1(iconv_t cd, char *escseq)
{
    int c, i, len;
    unsigned char mb[MAXMB];
    ucs4_t ucs4[MAXUCS4];
    char *inbuf, *outbuf;
    size_t inbytesleft, outbytesleft;
    size_t r;

    memset(singlebyte, 0, 256);
    len = strlen(escseq);

    for (c = 0; c < 0x100; c++) {
        i = 0;
        strcpy(mb, escseq);
        mb[len + i++] = (unsigned char)c;
        mb[len + i  ] = '\0';
        inbuf = (char *)mb;
        outbuf = (char *)ucs4;
        inbytesleft = len + i;
        outbytesleft = sizeof(ucs4);
        memset((void *)ucs4, 0, outbytesleft);

        if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft)
            != (size_t)(-1))
        {
            print_mb2ucs_one_char(mb, len + i,
                ucs4, (sizeof(ucs4) - outbytesleft) / sizeof(ucs4_t));
            singlebyte[c] = 1;
        } else {
            error_mb2ucs_one_char(mb, len + i, inbuf - (char *)mb);
        }
        iconv(cd, NULL, NULL, NULL, NULL);
    }
}

void
dump_mb2ucs_iso2022_2(iconv_t cd, char *escseq)
{
    int len;
    int c1, c2;
    unsigned char mb[MAXMB];
    ucs4_t ucs4[MAXUCS4];
    char *inbuf, *outbuf;
    size_t inbytesleft, outbytesleft;
    size_t r;
    int i, outlen;

    len = strlen(escseq);

    for (c1 = 0x00; c1 < 0x100; c1++) {
        if (is_singlebyte(c1)) {
            continue;
        }
        for (c2 = 0x00; c2 < 0x100; c2++) {
            i = 0;
            strcpy(mb, escseq);
            
            mb[len + i++] = (unsigned char)c1;
            mb[len + i++] = (unsigned char)c2;
            mb[len + i  ] = '\0';
            inbuf = (char *)mb;
            outbuf = (char *)ucs4;
            inbytesleft = len + i;
            outbytesleft = sizeof(ucs4);
            memset((void *)ucs4, 0, outbytesleft);

            if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft)
                != (size_t)(-1))
            {
                print_mb2ucs_one_char(mb, len + i,
                    ucs4, (sizeof(ucs4) - outbytesleft) / sizeof(ucs4_t));
            } else {
                error_mb2ucs_one_char(mb, len + i, inbuf - (char *)mb);
            }
            iconv(cd, NULL, NULL, NULL, NULL);
        }
    }
}

void
dump_mb2ucs_iso2022(const char *codeset, char *escseq)
{
    iconv_t cd;
    ucs4_t endian = 0x1234;
    char *ucs4_codeset;

    if (*((unsigned char *)&endian) == 0)
        ucs4_codeset = "UCS-4BE";
    else
        ucs4_codeset = "UCS-4LE";

    if ((cd = iconv_open(ucs4_codeset, codeset)) == (iconv_t)(-1)) {
        fprintf(stderr, "%s is not suppoted.\n", codeset);
        exit(1);
    }
    if (*(escseq + 1) == '(')  {
        dump_mb2ucs_iso2022_1(cd, escseq);
    } else if (*(escseq + 1) == '$') {
        dump_mb2ucs_iso2022_1(cd, escseq);
        dump_mb2ucs_iso2022_2(cd, escseq);
    } else {
        fprintf(stderr, "Unknown escape sequence.\n");
        exit(1);
    }

    iconv_close(cd);
}

void
init_table(void)
{
    int i;

    for (i = 0; i < 256; i++) {
	singlebyte[i] = 0;
    }
}

int
main(int argc, char *argv[])
{
    char *codeset;
    char escseq[MAXESCSEQ];

    if (argc != 3) {
        fprintf(stderr, "Usage: mb2ucs_iso2022 <codeset> <escape sequence>\n");
        return 1;
    }
    codeset = argv[1];
    if (strncasecmp(argv[2], "ESC", 3) == 0) {
        strncpy(escseq, "\x1B", MAXESCSEQ);
        strncat(escseq, argv[2] + 3, MAXESCSEQ);
    } else {
        fprintf(stderr, "%s: Illegal format.\n", argv[2]);
        return 1;
    }

    init_table();
    dump_mb2ucs_iso2022(codeset, escseq);

    return 0;
}
