/*
 * ztxt.c - An interface for ztxt
 * Copyright (C) mellanie
 *
*/

#include <ruby.h>
#include <rubyio.h>

#define RUBY_SCONV_VERSION "0.1.0"

#define OBJ_IS_FREED(val)	(RBASIC(val)->flags == 0)

#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include "s2u.h"
#include "u2s.h"

	/* sjis2004 based functions */
static VALUE
rb_sconv_ku_to_sj(obj)
	VALUE obj;
{
	char 		*str, *buf, *ku;
	uint16_t	len;
	uint8_t		p, r, c;
	unsigned char	sjis[256], sav;

	Check_Type(obj, T_STRING);
	Check_SafeStr(obj);
	str = StringValuePtr(obj);
	len = RSTRING(obj)->len;

	ku = malloc(sizeof(unsigned char) * len);
	memcpy(ku, str, len);
	buf = strtok(ku, "-");
	p = atoi(buf);
	buf = strtok(NULL, "-");
	r = atoi(buf);
	buf = strtok(NULL, "-");
	c = atoi(buf);
	free(ku);

	if ( p == 1)
	{
		if (r < 63)
		{
			sjis[0] = (unsigned char)((r + 0x101) / 2);
		}
		else
		{
			sjis[0] = (unsigned char)((r + 0x181) / 2);
		} /* end if */
	}
	else
	{
		if ((77 < r) && (r < 95))
		{
			sjis[0] = (unsigned char)((int)((r + 0x19b) / 2));
			
		}
		else
		{
			switch(r)
			{
				case 1:
				case 3:
				case 4:
				case 5:
				case 8:
				case 12:
				case 13:
				case 14:
				case 15:
					sjis[0] = (unsigned char)((int)((r + 0x1df) / 2) - ((int)(r / 8) * 3));
					break;
				default:
					break;
			}/* end switch */
		}/*  */
	} /* end if p == 1 */

	if ((r % 2) == 1)
	{		/* row is odd */
		if (c < 64)
		{
			sjis[1] = (unsigned char)(c + 0x3f);
		}
		else
		{
			sjis[1] = (unsigned char)(c + 0x40);
		}/* end if c < 64 */
	}
	else	/* row is even */
	{
		sjis[1] = (unsigned char)(c + 0x9e);
	}
	sjis[2] = (unsigned char)0x00;

	return rb_str_new((char *)sjis, 2);
} /* end rb_sconv_ku_to_sj */

static VALUE
rb_sconv_sj_to_ku(obj)
	VALUE obj;
{
	unsigned char *str, *buf;
	uint16_t	len;
	uint8_t		p, r, c;
	int8_t		l;
	uint8_t		s1, s2, even;
	uint8_t		i;
	VALUE		result;

	p = 0; r = 0; c = 0; l = 0;

	Check_Type(obj, T_STRING);
	Check_SafeStr(obj);
	str = (unsigned char *)StringValuePtr(obj);
	len = RSTRING(obj)->len;

	if ((len >= 2) && ((str[0] & 0x80) != 0))
	{
		s1 = (uint8_t)str[0];
		s2 = (uint8_t)str[1];

			/* make c from s2 byte and set flag of even or odd row */
		if ((0x40 <= s2) && (s2 <= 0x7e))
		{
			c = (uint8_t)((uint8_t)s2 - 0x3f);
			even = 0;
		} /* end if */

		if ((0x80 <= s2) && (s2 <= 0x9e))
		{
			c = (uint8_t)((uint8_t)s2 - 0x40);
			even = 0;
		} /* end if */
		
		if ((0x9e < s2) && (s2 <= 0xfc))
		{
			c = (uint8_t)((uint8_t)s2 - 0x9e);
			even = 1;
		
		} /* end if */

			/* make p, c from s1 byte and even flag */
		if ((0x81 <= s1) && (s1 <= 0x9f))
		{
			r = (uint8_t)((uint16_t)((uint16_t)s1 * 2) - (uint16_t)0x101 + even);
			p = 1;
		} /* end if */

		if ((0xe0 <= s1) && (s1 <= 0xef))
		{
			r = (uint8_t)((uint16_t)((uint16_t)s1 * 2) - (uint16_t)0x181 + even);
			p = 1;
		} /* end if */

		if ((0xf0 <= s1) && (s1 <= 0xf4))
		{
			switch(s1)
			{
				case 0xf0:
					r = (uint8_t)((even == 0)?1:8);
					break;
				case 0xf1:
					r = (uint8_t)((even == 0)?3:4);
					break;
				case 0xf2:
					r = (uint8_t)((even == 0)?5:12);
					break;
				case 0xf3:
					r = (uint8_t)((even == 0)?13:14);
					break;
				case 0xf4:
					r = (uint8_t)((even == 0)?15:78);
					break;
				default:
					break;
			} /* end switch */
			p = 2;
			l = 4;
		} /* end if 4th level 1st area */
		
		if ((0xf5 <= s1) && (s1 <= 0xfc))
		{
			r = (s1 * 2) - 0x19b + even;
			p = 2;
			l = 4;
		} /* end if 4th level 2nd area */

			/* make l from previous calcurated p, r, c */
		if (l == 0)
		{
			if ((r >= 1) && (r <= 13))
			{
				l = 0;
			}
			else if ((r >= 16) && (r <= 46))
			{
				l = 1;
			}
			else if (r == 47)
			{
				if (c <= 51)
				{
					l = 1;
				}
				else
				{
					l = 3;
				} /* end if r is 47 */
			}
			else if ((r >= 48) && (r <= 83))
			{
				l = 2;
			}
			else if (r == 84)
			{
				if (c <= 6)
				{
					l = 2;
				}
				else
				{
					l = 3;
				} /* end if c is under 6 or over 7 */
			}
			else if ((r >=14) && (r <= 15))
			{
				l = 3;
			} /* end if page is 1 */
			else if ((r >=85) && (r <= 94))
			{
				l = 3;
			} /* end if page is 1 */
		}/* end if l == 0 */

			/* make l of x0208 gaiji (l is 0 but, now make -1) */
		if (p == 1)
			switch (r)
			{
				case 2:
					if (((15 <= c) && (c <= 25)) || ((34 <= c) && (c <= 41)) || ((49 <= c) && (c <= 59)) || ((75 <= c) && (c <= 81)) || ((90 <= c) && (c <= 93)))
						l = -1;
					break;
				case 3:
					if (((1 <= c) && (c <= 15)) || ((26 <= c) && (c <= 32)) || ((59 <= c) && (c <= 64)) || ((91 <= c) && (c <= 94)))
						l = -1;
					break;
				case 4:
					if ((84 <= c) && (c <= 94))
						l = -1;
					break;
				case 5:
					if ((87 <= c) && (c <= 94))
						l = -1;
					break;
				case 6:
					if (((25 <= c) && (c <= 32)) || ((57 <= c) && (c <= 94)))
						l = -1;
					break;
				case 7:
					if (((34 <= c) && (c <= 48)) || ((82 <= c) && (c <= 94)))
						l = -1;
					break;
				case 8:
					if ((33 <= c) && (c <= 94))
						l = -1;
					break;
				case 9:
				case 10:
				case 11:
				case 12:
				case 13:
					l = -1;
				default:
					break;
			}/* end switch r */

	} /* end if first unsigned char is kanji */

	result = rb_ary_new();
	rb_ary_push(result, INT2FIX(p));
	rb_ary_push(result, INT2FIX(r));
	rb_ary_push(result, INT2FIX(c));
	rb_ary_push(result, INT2FIX(l));

	return result;	
} /* end rb_sconv_sj_to_ku */

static VALUE
rb_sconv_sj_to_utf8(obj)
	VALUE	obj;
{
	unsigned char *str, *buf;
	uint16_t	len;
	unsigned long		pos, upos;

	Check_Type(obj, T_STRING);
	Check_SafeStr(obj);
	str = (unsigned char *)StringValuePtr(obj);
	len = RSTRING(obj)->len;
	int datakind;

	buf = malloc(sizeof(unsigned char) * len * 3);

	upos = 0;
	for (pos = 0; pos < len; pos++)
	{
		unsigned char shi, slo;
		unsigned char u1, u2, u3, u4;
		uint16_t code, offset = 0;
		shi = str[pos];
		slo = str[pos + 1];
		code = (uint16_t)((uint8_t)shi << 8 | (uint8_t)slo);

		/* check & mark data type */
		datakind = 1;	/* default */
		if (( (str[pos] & 0x80) == 0))
		{		/* data is 7bit ascii */
			datakind = 0;
		}
		if ((0x0080 <= code) && (code <= 0x07ff))
		{
			datakind = 1;
		}
		if ((0x8140 <= code) && (code <= 0x9fff))
		{		/* data is shift-jis area A */
			datakind = 2;
		}
		if ((0xe040 <= code) && (code <= 0xfcf4))
		{		/* data is shift-jis area B */
			datakind = 3;
		}

		switch (datakind)
		{
			case 3:
				shi = str[pos];
				slo = str[++pos];
				code = (uint16_t)(((uint8_t)shi * 256) + (uint8_t)slo);

				offset = (code - 0xe040) + 0x1ebd;	/* 1ebd is offset of 0x8140~9ffc */				
				break;
			case 1:
			case 2:
				shi = str[pos];
				slo = str[++pos];
				code = (uint16_t)(((uint8_t)shi * 256) + (uint8_t)slo);
				offset = code - 0x8140;				
				break;
			default:
			case 0:
				buf[upos++] = str[pos];
				continue;
				break;
		} /* end switch by datakind */


		switch(s2utable[offset].size)
		{
			case 4:
				u1 = 0xc0 | (s2utable[offset].bits[0] << 2)  | s2utable[offset].bits[1];
				u2 = 0x80 | (s2utable[offset].bits[2] << 4)  | s2utable[offset].bits[3];
	
				buf[upos++] = u1;
				buf[upos++] = u2;

				break;
			case 5:
				u1 = 0xe0 |  s2utable[offset].bits[0];
				u2 = 0x80 | (s2utable[offset].bits[1] << 2)  | s2utable[offset].bits[2];
				u3 = 0x80 | (s2utable[offset].bits[3] << 4)  | s2utable[offset].bits[4];
	
				buf[upos++] = u1;
				buf[upos++] = u2;
				buf[upos++] = u3;

				break;
			case 7:
				u1 = 0xf0 |  s2utable[offset].bits[0];
				u2 = 0x80 | (s2utable[offset].bits[1] << 4)  |  s2utable[offset].bits[2];
				u3 = 0x80 | (s2utable[offset].bits[3] << 4)  | (s2utable[offset].bits[4] << 2) | s2utable[offset].bits[5] ;
				u4 = 0x80 |  s2utable[offset].bits[6];

				buf[upos++] = u1;
				buf[upos++] = u2;
				buf[upos++] = u3;
				buf[upos++] = u4;

				break;
			case 8:
					/* first byte */
				u1 = 0xc0 | (s2utable[offset].bits[0] << 2)  | s2utable[offset].bits[1];
				u2 = 0x80 | (s2utable[offset].bits[2] << 4)  | s2utable[offset].bits[3];
	
				buf[upos++] = u1;
				buf[upos++] = u2;
					/* second byte */
				u1 = 0xc0 | (s2utable[offset].bits[4] << 2)  | s2utable[offset].bits[5];
				u2 = 0x80 | (s2utable[offset].bits[6] << 4)  | s2utable[offset].bits[7];
	
				buf[upos++] = u1;
				buf[upos++] = u2;

				break;
			case 10:
					/* first byte */
				u1 = 0xe0 |  s2utable[offset].bits[0];
				u2 = 0x80 | (s2utable[offset].bits[1] << 2)  | s2utable[offset].bits[2];
				u3 = 0x80 | (s2utable[offset].bits[3] << 4)  | s2utable[offset].bits[4];
	
				buf[upos++] = u1;
				buf[upos++] = u2;
				buf[upos++] = u3;
					/* second byte */
				u1 = 0xe0 |  s2utable[offset].bits[5];
				u2 = 0x80 | (s2utable[offset].bits[6] << 2)  | s2utable[offset].bits[7];
				u3 = 0x80 | (s2utable[offset].bits[8] << 4)  | s2utable[offset].bits[9];
	
				buf[upos++] = u1;
				buf[upos++] = u2;
				buf[upos++] = u3;

				break;
			default :
				break;
		} /* end switch by s2utable[offset].size */
	} /* end for pos*/

//	buf = realloc(buf, upos);
	return rb_str_new((char *)buf, upos);
} /* end rb_sconv_sj_to_utf8 */

	/* utf8 based functions */
static VALUE
rb_sconv_utf8_to_sj(obj)
	VALUE	obj;
{
	unsigned char 		*str, *buf;
	unsigned char		ut1, ut2, ut3, ut4;
	charcode	curr;
	uint16_t	utf;
	uint32_t	surr;
	uint16_t	len;
	uint16_t	pos, spos;
	int			i;

	Check_Type(obj, T_STRING);
	Check_SafeStr(obj);
	str = (unsigned char *)StringValuePtr(obj);
	len = RSTRING(obj)->len;

	buf = malloc(sizeof(unsigned char) * len);
	spos = 0;
	for(pos = 0; pos <len; pos++)
	{		/* check for byte length */
		if ((str[pos] & 0x80) == 0)	/* one byte */
		{
			buf[spos++] = str[pos];
			continue;
		}/* end if one byte */
		if ((str[pos] & 0xe0) == 0xc0)	/* two byte */
		{
			ut1 = (uint8_t)str[pos++];
			ut2 = (uint8_t)str[pos];
			curr = decode2byte(ut1, ut2);
			if ((str[pos + 1] & 0xe0) == 0xc0)	/* next is three bytes again */
			{
				uint16_t		currutf, nextutf;
				uint32_t		combine;
				unsigned char	us1, us2;
				charcode		next;


				us1 = (unsigned char)str[pos + 1];
				us2 = (unsigned char)str[pos + 2];
				next = decode2byte(us1, us2);
				currutf = (uint16_t)(((uint16_t)curr.u1 * 256) + (uint8_t)curr.u2);
				nextutf = (uint16_t)(((uint16_t)next.u1 * 256) + (uint8_t)next.u2);
				combine = (uint32_t)((uint32_t)currutf << 16 | nextutf);
				switch (combine)
				{
					case 0x00e60300:
						curr.s1 = 0x86;
						curr.s2= 0x63;
						pos += 3;
						break;
					case 0x02540300:
						curr.s1 = 0x86;
						curr.s2 = 0x67;
						pos += 3;
						break;
					case 0x02540301:
						curr.s1 = 0x86;
						curr.s2 = 0x68;
						pos += 3;
						break;
					case 0x02590300:
						curr.s1 = 0x86;
						curr.s2 = 0x6b;
						pos += 3;
						break;
					case 0x02590301:
						curr.s1 = 0x86;
						curr.s2 = 0x6c;
						pos += 3;
						break;
					case 0x025a0300:
						curr.s1 = 0x86;
						curr.s2 = 0x6d;
						pos += 3;
						break;
					case 0x025a0301:
						curr.s1 = 0x86;
						curr.s2 = 0x6e;
						pos += 3;
						break;
					case 0x028c0300:
						curr.s1 = 0x86;
						curr.s2 = 0x69;
						pos += 3;
						break;
					case 0x028c0301:
						curr.s1 = 0x86;
						curr.s2 = 0x6a;
						pos += 3;
						break;
					case 0x02e502e9:
						curr.s1 = 0x86;
						curr.s2 = 0x86;
						pos += 3;
						break;
					case 0x02e902e5:
						curr.s1 = 0x86;
						curr.s2 = 0x85;
						pos += 3;
						break;
					default:
						break;
				}/* end case*/
			} /* end if three bytes again */

				/* default, append unsigned char */
			buf[spos++] = curr.s1;
			buf[spos++] = curr.s2;
			continue;
		}/* end if one byte */
		if ((str[pos] & 0xf0) == 0xe0)	/* three bytes */
		{
			ut1 = (uint8_t)str[pos++];
			ut2 = (uint8_t)str[pos++];
			ut3 = (uint8_t)str[pos];
			curr = decode3byte(ut1, ut2, ut3);
				/* check normalized form decompress */
			if ((str[pos + 1] & 0xf0) == 0xe0)	/* next is three bytes again */
			{
				uint16_t		currutf, nextutf;
				uint32_t		combine;
				unsigned char	us1, us2, us3, us4;
				charcode		next;


				us1 = (unsigned char)str[pos + 1];
				us2 = (unsigned char)str[pos + 2];
				us3 = (unsigned char)str[pos + 3];
				next = decode3byte(us1, us2, us3);
				currutf = (uint16_t)(((uint16_t)curr.u1 * 256) + (uint8_t)curr.u2);
				nextutf = (uint16_t)(((uint16_t)next.u1 * 256) + (uint8_t)next.u2);
				combine = (uint32_t)((uint32_t)currutf << 16 | nextutf);
				switch (combine)
				{
					case 0x304b309a:
						curr.s1 = 0x82;
						curr.s2 = 0xf5;
						pos += 3;
						break;
					case 0x304d309a:
						curr.s1 = 0x82;
						curr.s2 = 0xf6;
						pos += 3;
						break;
					case 0x304f309a:
						curr.s1 = 0x82;
						curr.s2 = 0xf7;
						pos += 3;
						break;
					case 0x3051309a:
						curr.s1 = 0x82;
						curr.s2 = 0xf8;
						pos += 3;
						break;
					case 0x3053309a:
						curr.s1 = 0x82;
						curr.s2 = 0xf9;
						pos += 3;
						break;
					case 0x30ab309a:
						curr.s1 = 0x83;
						curr.s2 = 0x97;
						pos += 3;
						break;
					case 0x30ad309a:
						curr.s1 = 0x83;
						curr.s2 = 0x98;
						pos += 3;
						break;
					case 0x30af309a:
						curr.s1 = 0x83;
						curr.s2 = 0x99;
						pos += 3;
						break;
					case 0x30b1309a:
						curr.s1 = 0x83;
						curr.s2 = 0x9a;
						pos += 3;
						break;
					case 0x30b3309a:
						curr.s1 = 0x83;
						curr.s2 = 0x9b;
						pos += 3;
						break;
					case 0x30bb309a:
						curr.s1 = 0x83;
						curr.s2 = 0x9c;
						pos += 3;
						break;
					case 0x30c4309a:
						curr.s1 = 0x83;
						curr.s2 = 0x9d;
						pos += 3;
						break;
					case 0x30c8309a:
						curr.s1 = 0x83;
						curr.s2 = 0x9e;
						pos += 3;
						break;
					case 0x31f7309a:
						curr.s1 = 0x83;
						curr.s2 = 0xf6;
						pos += 3;
						break;
					default:
						break;
				}/* end case*/
			} /* end if three bytes again */
				/* default, append unsigned char */
			buf[spos++] = curr.s1;
			buf[spos++] = curr.s2;
			continue;
		} /* end if threee bytes */
		if ((str[pos] & 0xf8) == 0xf0)	/* four bytes */
		{
			ut1 = str[pos++];
			ut2 = str[pos++];
			ut3 = str[pos++];
			ut4 = str[pos];
			curr = decode4byte(ut1, ut2, ut3, ut4);
			buf[spos++] = curr.s1;
			buf[spos++] = curr.s2;
			continue;
		} /* end if four bytes */
	}/* end for pos */

	return rb_str_new((char *)buf, spos);	
}

	/* initializer definition methods to String class */
void
Init_sconv()
{

	rb_define_method(rb_cString, "ku2sj", rb_sconv_ku_to_sj, 0);
	rb_define_method(rb_cString, "sj2ku", rb_sconv_sj_to_ku, 0);
	rb_define_method(rb_cString, "sj2utf8", rb_sconv_sj_to_utf8, 0);
//	cString = rb_define_method(rb_cString, "sj2utf16le", rb_sconv_to_utf16le, 0);
//	cString = rb_define_method(rb_cString, "sj2utf16be", rb_sconv_to_utf16be, 1);
//	cString = rb_define_method(rb_cString, "chg_endian", rb_sconv_chg_endian, 1);
	rb_define_method(rb_cString, "utf82sj", rb_sconv_utf8_to_sj, 0);

}