#!/usr/bin/env perl
#
# Output a convertion table from iso-2022-jp character to UCS character
# Copyright (C) 2006  MIRACLE LINUX CORPORATION.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

use Encode;
use Encode::ISO2022JPMS;

sub print_mb2ucs_one_char {
    my ($mb, $ucs) = @_;
    my ($mblen, $ucslen);

    $mblen = length($mb);
    $ucslen = length($ucs);
    return if ($ucslen == 0);

    if ($mblen == 0) {
        $mblen = 1;
        $ucslen = 1;
    }

    for (my $i = 0; $i < $mblen; $i++) {
        printf "\\x%02X", ord(substr($mb, $i, 1));
    }
    print " ";
    for (my $i = 0; $i < $ucslen; $i++) {
        printf "<U%04X>", ord(substr($ucs, $i, 1));
    }
    print "\n";
}

sub dump_mb2ucs_iso2022_1 {
    my ($codeset, $escseq) = @_;

    for ($c = 0; $c < 0x100; $c++) {
        my $mb = $escseq . pack('C', $c);
        my $ucs = decode($codeset, $mb);
        if ($ucs !~ /^\x{005C}\x{0078}/) { # \x{005C}\x{0078} -> \x
            print_mb2ucs_one_char($mb, $ucs);
        }
    }
}

sub dump_mb2ucs_iso2022_2 {
    my ($codeset, $escseq) = @_;

    for (my $c1 = 0x21; $c1 < 0x7F; $c1++) {
        for ($c2 = 0x21; $c2 < 0x7F; $c2++) {
            my $mb = $escseq . pack('CC', $c1, $c2);
            my $ucs = decode($codeset, $mb);
            if ($ucs !~ /^\x{005C}\x{0078}/) { # \x{005C}\x{0078} -> \x
                print_mb2ucs_one_char($mb, $ucs);
            }
        }
    }
}

sub dump_mb2ucs_iso2022 {
    my ($codeset, $escseq) = @_;

    if (substr($escseq, 1, 1) eq "(") {
        dump_mb2ucs_iso2022_1($codeset, $escseq);
    } elsif (substr($escseq, 1, 1) eq "\$") {
        dump_mb2ucs_iso2022_2($codeset, $escseq);
    } else {
        printf STDERR "Unknown escape sequence.\n";
        exit 1;
    }
}

my $codeset;
my $escseq;

if ($#ARGV != 1) {
    printf STDERR <<END;
Usage: mb2ucs_iso2022_perl <codeset> <escape sequence>
   codeset
       iso-2022-jp
       iso-2022-jp-ms
   escape sequence
       'ESC(B'
       'ESC(J'
       'ESC(I'
       'ESC\$@'
       'ESC\$B'
       'ESC\$(?'
END
    exit 1;
}
$codeset = $ARGV[0];
if (!defined(find_encoding($codeset))) {
    print STDERR "Unknown encoding $codeset\n";
    exit 1;
}
if ($ARGV[1] =~ /^ESC/i ) {
    $escseq = $ARGV[1];
    $escseq =~ s/^ESC/\x1B/i;
} else {
   printf STDERR "%s: Illegal format.\n", $ARGV[1];
   exit 1;
}

dump_mb2ucs_iso2022($codeset, $escseq);

0;
