#!/usr/bin/env perl

use Encode;
use FileHandle;
use IPC::Open2;

sub nkf_decode {
    my ($codeset, $mb) = @_;
    my $utf8;
    if ($mb =~ /[\x00-\x1F\x80-\xFF]$/s) {
        $mb =~ s/(.)/"\\x" . unpack('H*', $1)/sgex;
        $utf8 = `printf "$mb" | nkf -ux --ic=$codeset --oc=utf-8`;
    } else {
        print NKFIN $mb . "\x1B\(B\n";
        $utf8 = <NKFOUT>;
	$utf8 =~ s/\x1B\(B$//;
        chomp($utf8);
    }
    return Encode::decode("utf-8", $utf8);
}

sub print_mb2ucs_one_char {
    my ($mb, $ucs) = @_;
    my ($mblen, $ucslen);

    $mblen = length($mb);
    $ucslen = length($ucs);
    return if ($ucslen == 0);

    if ($mblen == 0) {
        $mblen = 1;
        $ucslen = 1;
    }

    for (my $i = 0; $i < $mblen; $i++) {
        printf "\\x%02X", ord(substr($mb, $i, 1));
    }
    print " ";
    for (my $i = 0; $i < $ucslen; $i++) {
        printf "<U%04X>", ord(substr($ucs, $i, 1));
    }
    print "\n";
}

sub dump_mb2ucs_iso2022_1 {
    my ($codeset, $escseq) = @_;
    my $len = length($escseq);

    for ($c = 0; $c < 0x100; $c++) {
        my $mb = $escseq . pack('C', $c);
        my $ucs = nkf_decode($codeset, $mb);
        if (length($ucs) > 0) {
            print_mb2ucs_one_char(substr($mb, $len), $ucs);
        }
    }
}

sub dump_mb2ucs_iso2022_2 {
    my ($codeset, $escseq) = @_;
    my $len = length($escseq);

    for (my $c1 = 0x21; $c1 < 0x7F; $c1++) {
        for ($c2 = 0x21; $c2 < 0x7F; $c2++) {
            my $mb = $escseq . pack('CC', $c1, $c2);
            my $ucs = nkf_decode($codeset, $mb);
	    if (length($ucs) > 0) {
                print_mb2ucs_one_char(substr($mb, $len), $ucs);
            }
        }
    }
}

sub dump_mb2ucs_iso2022 {
    my ($codeset, $escseq) = @_;

    if (substr($escseq, 1, 1) eq "(") {
        dump_mb2ucs_iso2022_1($codeset, $escseq);
    } elsif (substr($escseq, 1, 1) eq "\$") {
        dump_mb2ucs_iso2022_2($codeset, $escseq);
    } else {
        printf STDERR "Unknown escape sequence.\n";
        exit 1;
    }
}

my $codeset;
my $escseq;

if ($#ARGV != 1) {
    printf STDERR "Usage: mb2ucs_iso2022 <codeset> <escape sequence>\n";
    exit 1;
}
$codeset = $ARGV[0];
if ($ARGV[1] =~ /^ESC/i ) {
    $escseq = $ARGV[1];
    $escseq =~ s/^ESC/\x1B/i;
} else {
   printf STDERR "%s: Illegal format.\n", $ARGV[1];
   exit 1;
}

my $pid = open2(\*NKFOUT, \*NKFIN, "nkf",
                                   "-ux",
                                   "--ic=${codeset}",
                                   "--oc=utf-8" );
dump_mb2ucs_iso2022($codeset, $escseq);

close NKFIN;
close NKFOUT;
waitpid($pid, 0);

0;
