#!/usr/bin/env perl

use strict;
use Encode;
use FileHandle;
use IPC::Open2;

my %search_plane = (
    'cp932'    => [0],
    'cp51932'  => [0],
    'eucjp-ms' => [0],
    'sjis'     => [0],
    'euc-jp'   => [0]
);

sub nkf_encode {
#    my ($codeset, $utf8) = @_;
#    $utf8 = Encode::encode("utf-8", $utf8);
#    $utf8 =~ s/(.)/"\\x" . unpack('H*', $1)/gex;
#    my $mb = `printf "$utf8" | nkf --ic=utf-8 --oc=$codeset`;

    my ($codeset, $utf8) = @_;
    my $mb;
    $utf8 = Encode::encode("utf-8", $utf8);
#    if (ord(substr($utf8, 0, 1)) == 0x0A) {
#        print NKFIN "\n";
#        $mb = <NKFOUT>;
    if ($utf8 =~ /^[\x{0000}-\x{001F}]$/s) {
        $utf8 = Encode::encode("utf-8", $utf8);
        $utf8 =~ s/(.)/"\\x" . unpack('H*', $1)/gex;
        $mb = `printf "$utf8" | nkf -ux --ic=utf-8 --oc=$codeset`;
    } else {
        print NKFIN $utf8 . "\n";
        $mb = <NKFOUT>;
        chomp($mb);
    }
    return $mb;
}

sub print_ucs2mb_one_char {
    my ($ucs, $mb) = @_;
    my $mblen = length($mb);

    printf "<U%04X> ", $ucs;
    for (my $i = 0; $i < $mblen; $i++) {
        printf "\\x%02X", ord(substr($mb, $i, 1));
    }
    print "\n";
}

sub dump_ucs2mb_plane {
    my ($codeset, $plane) = @_;

    for (my $u = 0; $u < 0x10000; $u++) {
        if ($plane == 0 && $u >= 0xD800 && $u <= 0xDFFF) {
            next;
        }
        my $utf8 = chr($plane << 16 | $u);
        my $mb = nkf_encode($codeset, $utf8);
        if (length($mb) > 0) {
            print_ucs2mb_one_char(ord($utf8), $mb);
        }
    }
}

sub dump_ucs2mb {
    my $codeset = shift;
    my $p = $search_plane{$codeset};
    foreach my $plane (@$p) {
        dump_ucs2mb_plane($codeset, $plane);
    }
}

my $codeset;

if ($#ARGV != 0) {
    printf STDERR "Usage: ucs2mb.pl codeset\n";
    exit 1;
}
$codeset = $ARGV[0];

my $pid = open2(\*NKFOUT, \*NKFIN, "nkf",
                                   "-ux",
                                   "--ic=utf-8",
                                   "--oc=${codeset}");

dump_ucs2mb($codeset);

0;
