#!perl

# UnicodeWidth.pl
# Last update: 2017.01.13
# (c) 2016 JOJO

#Usage: perl scintilla/scripts/UnicodeWidth.pl > scintilla/src/UnicodeWidthData

use utf8;
use warnings;
use strict;
use File::Basename;

my $DEBUG = 0;

my $UnicodeWidth = File::Basename::dirname($0)."/UnicodeWidth.txt";	# 'scintilla/scripts/UnicodeWidth.txt'

my %EAWhash = ('A'=>0, 'F'=>2, 'H'=>1, 'N'=>1, 'Na'=>1, 'W'=>2);

my $BITSHIFT = 8;
my $BITMASK = (1 << $BITSHIFT) - 1;

my $icp = 0;
my $ieaw = "";

open FILE, '<:utf8', $UnicodeWidth or die $!;

print "/*
 * http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
 */
static const int SHIFT = $BITSHIFT;
static const int MASK = $BITMASK;
static const int data[] = {\n";

while (not eof(FILE)) {
	# 0000..007F; Basic Latin
	my $line = <FILE>;
	
	my ($begin, $end, $property);
	if ($line =~ m/^([0-9A-F]+)\.\.([0-9A-F]+);([A-Za-z]+)/) {
		$begin = $1;
		$end   = $2;
		$property = $3;
	}
	elsif ($line =~ m/^([0-9A-F]+);([A-Za-z]+)/) {
		$begin = $1;
		$end   = $1;
		$property = $2;
	}
	else {
		next
	}
	
	my $ibegin = hex($begin);
	my $iend   = hex($end) + 1;

	if ($icp < $ibegin) {
		#  - All code points, assigned or unassigned, that are not listed
		#      explicitly are given the value "N".
		#  - The unassigned code points in the following blocks default to "W":
		#         CJK Unified Ideographs Extension A: U+3400..U+4DBF
		#         CJK Unified Ideographs:             U+4E00..U+9FFF
		#         CJK Compatibility Ideographs:       U+F900..U+FAFF
		#  - All undesignated code points in Planes 2 and 3, whether inside or
		#      outside of allocated blocks, default to "W":
		#         Plane 2:                            U+20000..U+2FFFD
		#         Plane 3:                            U+30000..U+3FFFD
		my $p;
		if (0x3400  <= $icp && $icp <= 0x4DBF
		 || 0x4E00  <= $icp && $icp <= 0x9FFF
		 || 0xF900  <= $icp && $icp <= 0xFAFF
		 || 0x20000 <= $icp && $icp <= 0x2FFFD
		 || 0x30000 <= $icp && $icp <= 0x3FFFD)
			{ $p = "W" }
		else
			{ $p = "N"; }
		my $i = $EAWhash{$p};

		if ($ieaw ne $i) {
			$ieaw = $i;
			my $v = $icp << $BITSHIFT | $ieaw;
			print "$v,";
			print sprintf "\t// %04X %s", $icp, $ieaw if $DEBUG;
			print "\n";
		}
		$icp = $ibegin;
	}
	if ($icp < $iend) {
		my $p = $property;
		my $i = $EAWhash{$p};

		if ($ieaw ne $i) {
			$ieaw = $i;
			my $v = $icp << $BITSHIFT | $ieaw;
			print "$v,";
			print sprintf "\t// %04X %s", $icp, $ieaw if $DEBUG;
			print "\n";
		}
		$icp = $iend;
	}
}
print "};\n";
close FILE;
