#!/usr/bin/env perl

use Encode;
use FileHandle;
use IPC::Open2;

my %search_plane = (
    'iso-2022-jp-ms' => [0],
    'iso-2022-jp'    => [0],
);

sub nkf_encode {
#    my ($codeset, $utf8) = @_;
#    $utf8 = Encode::encode("utf-8", $utf8);
#    $utf8 =~ s/(.)/"\\x" . unpack('H*', $1)/gex;
#    my $mb = `printf "$utf8" | nkf --ic=utf-8 --oc=$codeset`;

    my ($codeset, $utf8) = @_;
    my $mb;
    if ($utf8 =~ /[\x{0000}-\x{001F}]$/s) {
        $utf8 = Encode::encode("utf-8", $utf8);
        $utf8 =~ s/(.)/"\\x" . unpack('H*', $1)/gex;
        $mb = `printf "$utf8" | nkf -ux --ic=utf-8 --oc=$codeset`;
    } else {
        $utf8 = Encode::encode("utf-8", $utf8);
        print NKFIN $utf8 . "\n";
        $mb = <NKFOUT>;
        chomp($mb);
    }
    return $mb;
}

sub print_ucs2mb_one_char {
    my ($ucs, $mb) = @_;
    my $mblen = length($mb);

    printf "<U%04X> ", $ucs;
    for (my $i = 0; $i < $mblen; $i++) {
        printf "\\x%02X", ord(substr($mb, $i, 1));
    }
    print "\n";
}

sub dump_ucs2mb_iso2022_plane {
    my ($codeset, $plane, $escseq) = @_;
    my $state_ascii = 0;
    my $ucs;

    my $dummy = nkf_encode($codeset, "\x{3000}");
    $dummy =~ s/\x1B\(B$//;
    my $skiplen = length($dummy);

    for (my $u = 0; $u < 0x10000; $u++) {
        next if ($plane == 0 && $u >= 0xD800 && $u <= 0xDFFF);
        undef $ucs;
        if ($escseq =~ /^\x1B\([BJ]/) {
            $state_ascii = 1;
            $ucs = chr(0x3000);
        }
        $ucs .= chr($plane << 16 | $u);
        $mb = nkf_encode($codeset, $ucs);
        if ($state_ascii) {
            $mb = substr($mb, $skiplen);
        }
        my $outesc = substr($mb, 0, length($escseq));
        if ($outesc eq $escseq && $mb !~ /^\x1B\(B\\x/) {
            $mb = substr($mb, length($escseq));
            unless ($state_ascii) {
                $mb =~ s/\x1B\(B//;
            }
            if (length($mb) > 0) {
                print_ucs2mb_one_char($u, $mb);
            }
        }
    }
}

sub dump_ucs2mb_iso2022 {
    my ($codeset, $escseq) = @_;
    my $p = $search_plane{$codeset};
    foreach my $plane (@$p) {
        dump_ucs2mb_iso2022_plane($codeset, $plane, $escseq);
    }
}

my $codeset;
my $escseq;

if ($#ARGV != 1) {
    printf STDERR "Usage: mb2ucs_iso2022 <codeset> <escape sequence>\n";
    exit 1;
}
$codeset = $ARGV[0];
if ($ARGV[1] =~ /^ESC/i ) {
    $escseq = $ARGV[1];
    $escseq =~ s/^ESC/\x1B/i;
} else {
   printf STDERR "%s: Illegal format.\n", $ARGV[1];
   exit 1;
}

my $pid = open2(\*NKFOUT, \*NKFIN, "nkf",
                                   "-ux",
                                   "--ic=utf-8",
                                   "--oc=${codeset}");

dump_ucs2mb_iso2022($codeset, $escseq);

close NKFIN;
waitpid($pid, 0);

0;
