﻿#!/usr/local/bin/perl

# gen-uax29-test.pl (c) 2007 exeal
#
# This script generates C++ code for tests about concrete break iterators
# implement ascension.text.BreakIterator. See ascension/test/break-iterator-test.cpp.
#
# This takes three input files obtained from Unicode.org:
# - GraphemeBreakTest.txt
# - WordBreakTest.txt
# - SentenceBreakTest.txt
# in UNIDATA/auxiliary/.

use strict;
use warnings;
use integer;
use IO::File;

my $out = new IO::File('> break-iterator-test.cpp');
die "can't open output file: break-iterator-test.cpp\n" unless(defined($out));

sub handleFile($$) {
	my ($fileName, $postfix) = @_;
	my $in = new IO::File('< ' . $fileName);
	die("can't open: $fileName\n") if(!defined($in));
	my $lastIndices = '';
	while(<$in>) {
		next unless(m/^÷[^\#]+/);
		$_ = $&;
		my ($i, $charIndex, $text) = (0, 0, '');
		my @indices;
		while(/(÷|×)\s([\dA-F]+)/g) {
			$text .= '\x' . $2;
			push(@indices, $charIndex) if($1 eq '÷');
			++$charIndex;
		}
		push(@indices, $charIndex);
		if(join(', ', @indices) ne $lastIndices) {
			print $out "\t" . 'initializeContainer(indices) = ' . join(', ', @indices) . ";\n";
			$lastIndices = join(', ', @indices);
		}
		print $out "\tcheck$postfix(L\"$text\", indices);\n";
	}
}

die "usage: gen-uax29-test.pl [input-file-directory]\n" if($#ARGV != 0 or $ARGV[0] eq '-h');
my $directory = shift @ARGV;
if($directory ne '') {
	$directory =~ s/\//\\/;
	$directory .= '\\' unless($directory =~ /\\$/);
}

print $out '// automatically generated by `perl gen-uax29-test.pl` at $ ' . scalar(localtime) . " \$\n\n";
print $out <<'HEAD';
// break-iterator-test.cpp
#include "../unicode.hpp"
#include <boost/test/included/test_exec_monitor.hpp>
namespace a = ascension;
namespace t = ascension::text;

namespace {
	template<class Container>
	class ContainerInitializeContext {
	public:
		explicit ContainerInitializeContext(Container& c) throw() : c_(c) {}
		ContainerInitializeContext& operator,(typename Container::value_type e) {c_.push_back(e); return *this;}
	private:
		Container& c_;
	};
	template<class Container>
	class ContainerInitializer {
	public:
		explicit ContainerInitializer(Container& c) throw() : c_(c) {}
		ContainerInitializeContext<Container> operator=(typename Container::value_type e) {
			c_.clear(); c_.push_back(e); return ContainerInitializeContext<Container>(c_);}
	private:
		Container& c_;
	};
	template<class Container>
	inline ContainerInitializer<Container> initializeContainer(Container& c) {
		return ContainerInitializer<Container>(c);
	}

	template<template<class> class Iterator>
	void check(Iterator<t::StringCharacterIterator>& i, const std::vector<std::size_t>& indices) {
		const a::Char* const p = i.base().tell();
		// forward iteration
		for(std::vector<std::size_t>::const_iterator j = indices.begin(); j != indices.end(); ++j)
			BOOST_WARN_EQUAL((i++).base().tell(), p + *j);
		// backward iteration
		BOOST_WARN(!i.base().hasNext());	// BOOST_REQUIRE is preferred
		for(std::vector<std::size_t>::const_reverse_iterator j = indices.rbegin(); j != indices.rend(); ++j)
			BOOST_WARN_EQUAL((i--).base().tell(), p + *j);
		// random check
		BOOST_WARN(!i.base().hasPrevious());	// BOOST_REQUIRE is preferred
		for(std::vector<std::size_t>::const_iterator j = indices.begin(); j != indices.end(); ++j)
			BOOST_WARN(i.isBoundary(t::StringCharacterIterator(p, i.base().end(), p + *j)));
	}

	inline void checkGBI(const a::String& s, const std::vector<std::size_t>& indices) {
		t::StringCharacterIterator text(s);
		t::GraphemeBreakIterator<t::StringCharacterIterator> i(text);
		return check(i, indices);
	}

	inline void checkWBI(const a::String& s, const std::vector<std::size_t>& indices) {
		t::StringCharacterIterator text(s);
		t::WordBreakIterator<t::StringCharacterIterator> i(text, t::AbstractWordBreakIterator::BOUNDARY_OF_SEGMENT, t::IdentifierSyntax());
		return check<t::WordBreakIterator>(i, indices);
	}

	inline void checkSBI(const a::String& s, const std::vector<std::size_t>& indices) {
	}
}
HEAD

print $out "\nvoid testGraphemeBreakIterator() {\n\tstd::vector<std::size_t> indices;\n";
handleFile($directory . 'GraphemeBreakTest.txt', 'GBI');
print $out "}\n\nvoid testWordBreakIterator() {\n\tstd::vector<std::size_t> indices;\n";
handleFile($directory . 'WordBreakTest.txt', 'WBI');
print $out "}\n\nvoid testSentenceBreakIterator() {\n\tstd::vector<std::size_t> indices;\n";
handleFile($directory . 'SentenceBreakTest.txt', 'SBI');
print $out "}\n";

print $out <<'FOOTER';

int test_main(int, char*[]) {
	boost::unit_test::test_suite* test = BOOST_TEST_SUITE("Break iterator test");
	test->add(BOOST_TEST_CASE(&testGraphemeBreakIterator));
	test->add(BOOST_TEST_CASE(&testWordBreakIterator));
	test->add(BOOST_TEST_CASE(&testSentenceBreakIterator));
	return 0;
}
FOOTER
