#!/usr/bin/env ruby

require 'nkf'

class String; def ord; self.unpack('U*') end end

$singlebyte = Array.new(256, 0)
$doublebyte_1st = Array.new(256, 0)
$doublebyte_2nd = Array.new(256, 0)

def is_singlebyte(c)
    return $singlebyte[c] == 1
end

def is_doublebyte(c1, c2)
    return $doublebyte_1st[c1] == 1 && $doublebyte_2nd[c2] == 1
end

def print_mb2ucs_one_char(mb, ucs)
    mblen = mb.length
    ucslen = ucs.length

    if mblen == 0
        mblen = 1
        ucslen = 1
    end

    for i in 0..(mblen - 1)
        printf("\\x%02X", mb[i])
    end
    print " "
    for i in 0..(ucslen - 1)
        printf("<U%04X>", ucs[i])
    end
    print "\n"
end

def dump_mb2ucs_1(nkf_opt)
    for c1 in 0x00..0xFF
        mb = c1.chr
        utf8 = NKF.nkf(nkf_opt, mb)
        if c1 == 0 || utf8.length !=0 && utf8[0] != 0
            print_mb2ucs_one_char(mb, utf8.ord)
            $singlebyte[c1] = 1
        end
    end
end

def dump_mb2ucs_2(nkf_opt)
    for c1 in 0x80..0xFF
        if is_singlebyte(c1)
            next
        end
        for c2 in 0x01..0xFF
            mb = c1.chr + c2.chr
            utf8 = NKF.nkf(nkf_opt, mb)
            if utf8.length == 1 && utf8[0] > 0x7F
                next
            end
            if utf8.length != 0 && utf8[0] != 0
                print_mb2ucs_one_char(mb, utf8.ord)
                $doublebyte_1st[c1] = 1
                $doublebyte_2nd[c2] = 1
            end
        end
    end
end

def dump_mb2ucs_3(nkf_opt)
    for c1 in 0x01..0xFF
        if is_singlebyte(c1)
            next
        end
        for c2 in 0x01..0xFF
            if is_doublebyte(c1, c2)
                next
            end
            for c3 in 0x01..0xFF
                if is_singlebyte(c3)
                    next
                end
                mb = c1.chr + c2.chr + c3.chr
                utf8 = NKF.nkf(nkf_opt, mb)
                if utf8.length == 1 && utf8[0] > 0x7F
                    next
                end
                if utf8.length != 0 && utf8[0] != 0
                    print_mb2ucs_one_char(mb, utf8.ord)
                end
            end
        end
    end
end

def dump_mb2ucs(codeset)
    nkf_opt = "--ic=" + codeset + " --oc=UTF-8 -x"

    dump_mb2ucs_1(nkf_opt)
    dump_mb2ucs_2(nkf_opt)
    dump_mb2ucs_3(nkf_opt)
end

if ARGV.size != 1
    STDERR.print "Usage: mb2ucs_ruby codeset\n";
    exit 1
else
    codeset = ARGV[0]
    dump_mb2ucs(codeset)
end

exit 0
