

/*	
	GameboyVM - Nintendo Gameboy Emulator
		Copyright 2002 Y_N y_n@users.sourceforge.jp
		Homepage https://sourceforge.jp/projects/gbemu/
*/

#include "../include/defines.h"

#ifdef	_WINCE_GDI
#undef	_ASM_
#endif	/*_WINCE_GDI*/

/*
#include <string.h>
extern u8 MEM[0x10000];
extern u8 FrameBuffer[160*144];
extern u16 FrameBuffer16[160*144];
extern u8 EnableBG, EnableWND, EnableOBJ;
*/


static void DrawLineCgb()
{	/*CGB16bitrbg}bv*/
	u32	fba, offset, tma_offset;
	u16	tdba, tda, tma, tmpx;
	u8	tdd, tds, tilebit, tmp8;
	u8	Start, tpy, mpy, mpx;
	u16	bgp0, bgp1, bgp2, bgp3, obp1, obp2, obp3;
	u16	LineBuffer[176];
	u16	temp16;
	u32	bgpd_offset, fvram_bank, fxflip, tmp_atbt;
#ifndef	_ASM_
	u16	*fbuf, *lbuf;
#endif	/*_ASM_*/

	tma_offset=0;

	memset(LineBuffer, 0, 176);

	if(MEM[R_LCDC]&0x10){	/*^Cf[^̃x[XAhX̑I*/
		tdba = 0x8000;
		offset = 0x00;
	}else{
		tdba = 0x8800;
		offset = 0x80;	/*t*/
	}

	Start=MEM[R_SCX];
	mpx=Start>>3;	/*^C}bvX*/
	Start&=7;	/*^Cf[^̃CX*/
	
	tpy=MEM[R_SCY]+MEM[R_LY];
	mpy=tpy>>3;	/*^C}bvY*/
	tpy&=7;	/*^Cf[^̃CY*/

	if((MEM[R_LCDC]&0x01) && EnableBG){	/*BG̕\*/
		tma=(MEM[R_LCDC]&0x08)?0x9C00:0x9800;	/*^C}bṽAhXI*/
		tma+=mpx+(mpy<<5);
		for(tmpx=0; tmpx<168; tmpx+=8){
			if((31-mpx)<(tmpx>>3))tma_offset=32;	/*^C̃I[o[bv*/
			tmp_atbt=VRAM[tma-0x8000];
			bgpd_offset=(tmp_atbt&0x3)<<3;
			fvram_bank=tmp_atbt&0x08;
			fxflip=tmp_atbt&0x20;
			/*BGR->RGBϊ*/
			temp16 = (cgb_bg_pal[1+bgpd_offset]<<8)|cgb_bg_pal[0+bgpd_offset];
			bgp0 = ((temp16>>10)&0x1F)|(temp16&0x3E0)|((temp16<<10)&0x7C00);
			temp16 = (cgb_bg_pal[3+bgpd_offset]<<8)|cgb_bg_pal[2+bgpd_offset];
			bgp1 = ((temp16>>10)&0x1F)|(temp16&0x3E0)|((temp16<<10)&0x7C00);
			temp16 = (cgb_bg_pal[5+bgpd_offset]<<8)|cgb_bg_pal[4+bgpd_offset];
			bgp2 = ((temp16>>10)&0x1F)|(temp16&0x3E0)|((temp16<<10)&0x7C00);
			temp16 = (cgb_bg_pal[7+bgpd_offset]<<8)|cgb_bg_pal[6+bgpd_offset];
			bgp3 = ((temp16>>10)&0x1F)|(temp16&0x3E0)|((temp16<<10)&0x7C00);
			/*]iR[hEj]*/
			tda=tdba+((u8)(MEM[(tma++)-tma_offset]-offset)<<4)+((tmp_atbt&0x40)?14-(tpy<<1):(tpy<<1));
			if(fvram_bank){
				tdd=VRAM[(tda++)-0x8000];
				tds=VRAM[tda-0x8000];
			}else{
				tdd=MEM[tda++];
				tds=MEM[tda];
			}
#ifdef	_ASM_
			__asm{
				xor		eax, eax	; fba=tmpx+Start
				mov		ax, tmpx
				add		eax, 8
				shl		eax, 1
				lea		edi, [LineBuffer+eax]
				mov		dl, 80h
bg_start_loop:
				mov		al, tdd		; tdd&tds&tilebit
				mov		cl, tds
				and		al, cl
				and		al, dl
				cmp		al, dl
				jnz		bg_palette0
				mov		cx, bgp3
				mov		[edi], cx
				jmp		bg_palette3
bg_palette0:
				mov		al, tdd		; ~tdd&tds&tilebit
				mov		cl, tds
				not		al
				and		al, cl
				and		al, dl
				cmp		al, dl
				jnz		bg_palette1
				mov		cx, bgp2
				mov		[edi], cx
				jmp		bg_palette3
bg_palette1:
				mov		al, tdd		; tdd&~tds&tilebit, 
				mov		cl, tds
				not		cl
				and		al, cl
				and		al, dl
				cmp		al, dl
				jnz		bg_palette2
				mov		cx, bgp1
				mov		[edi], cx
				jmp		bg_palette3
bg_palette2:
				mov		al, tdd		; ~tdd&~tds&tilebit
				mov		cl, tds
				and		al, cl
				and		al, dl
				cmp		al, dl
				jz		bg_palette3
				mov		cx, bgp0
				mov		[edi], cx
bg_palette3:
				add		edi, 2
				shr		dl, 1
				jnz		bg_start_loop
			}
#else
			fba=tmpx+8;
			lbuf=&LineBuffer[fba];
			for(tilebit=fxflip?0x01:0x80; tilebit;	/*BG̐]*/
				tilebit=fxflip?tilebit<<1:tilebit>>1, lbuf++){
				if(~tdd&~tds&tilebit)*lbuf=bgp0;
				if(tdd&~tds&tilebit)*lbuf=bgp1;
				if(~tdd&tds&tilebit)*lbuf=bgp2;
				if(tdd&tds&tilebit)*lbuf=bgp3;
			}
#endif	/*_ASM_*/
		}
	}

	if((MEM[R_LCDC]&0x20) && EnableWND){	/*EBhE̕\*/
		if(MEM[R_WY]<144 && MEM[R_LY]>=MEM[R_WY] && MEM[R_WX]<167 && MEM[R_WX]>=7){
			tpy=MEM[R_LY]-MEM[R_WY];
			mpy=tpy>>3;	/*^C}bvY*/
			tpy&=7;	/*^Cf[^̃CY*/
			tma=(MEM[R_LCDC]&0x40)?0x9C00:0x9800;
			tma+=mpy<<5;
			for(tmpx=MEM[R_WX]+1; tmpx<168; tmpx+=8){
				tda=tdba+((u8)(MEM[tma++]+offset)<<4)+(tpy<<1);
				tdd=MEM[tda++];
				tds=MEM[tda];
				fba=tmpx+Start;
#ifdef	_ASM_
				__asm{
					xor		eax, eax	; fba=tmpx+Start
					mov		ax, tmpx
					add		al, Start
					shl		eax, 1
					lea		edi, [LineBuffer+eax]
					mov		dl, 80h		; tilebit
wnd_start_loop:
					mov		al, tdd		; tdd&tds&tilebit
					mov		cl, tds
					and		al, cl
					and		al, dl
					cmp		al, dl
					jnz		wnd_palette0
					mov		cx, bgp3
					mov		[edi], cx
					jmp		wnd_palette3
wnd_palette0:
					mov		al, tdd		; ~tdd&tds&tilebit
					mov		cl, tds
					not		al
					and		al, cl
					and		al, dl
					cmp		al, dl
					jnz		wnd_palette1
					mov		cx, bgp2
					mov		[edi], cx
					jmp		wnd_palette3
wnd_palette1:
					mov		al, tdd		; tdd&~tds&tilebit
					mov		cl, tds
					not		cl
					and		al, cl
					and		al, dl
					cmp		al, dl
					jnz		wnd_palette2
					mov		cx, bgp1
					mov		[edi], cx
					jmp		wnd_palette3
wnd_palette2:
					mov		al, tdd		; ~tdd&~tds&tilebit
					mov		cl, tds
					and		al, cl
					and		al, dl
					cmp		al, dl
					jz		wnd_palette3
					mov		cx, bgp0
					mov		[edi], cx
wnd_palette3:
					add		edi, 2
					shr		dl, 1
					jnz		wnd_start_loop
				}
#else
				fba = tmpx + Start;
				lbuf=&LineBuffer[fba];
				for(tilebit=0x80; tilebit; tilebit>>=1, *lbuf++){
					if(~tdd&~tds&tilebit)*lbuf=bgp0;
					if(tdd&~tds&tilebit)*lbuf=bgp1;
					if(~tdd&tds&tilebit)*lbuf=bgp2;
					if(tdd&tds&tilebit)*lbuf=bgp3;
				}
#endif	/*_ASM_*/
			}
		}
	}

	if((MEM[R_LCDC]&0x02) && EnableOBJ){
		for(tma=0xFE02; tma<0xFEA2; tma+=4){
			if(OBJ_X && OBJ_Y){	/*XvCg͈͊Oɂ:Е̍WO*/
				fba=0;	/*͈͓ɏ*/
				tpy=MEM[R_LY]-OBJ_Y+16;
				if(MEM[R_LCDC]&0x04){	/*8*16[h*/
					if(tpy>=16)fba=255;	/*YW͈͊O*/
					else fba=OBJ_X+Start;
					tmp8=MEM[tma]&0xFE;	/*LSB͖*/
					offset=tpy<<1;
					if(OBJ_F&0x40)offset=0x1E-offset;	/*Y]*/
				}else{	/*8*8[h*/
					if(tpy>=8)fba=255;	/*YW͈͊O*/
					else fba=OBJ_X+Start;
					tmp8=MEM[tma];
					offset=tpy<<1;
					if(OBJ_F&0x40)offset=0x0E-offset;	/*Y]*/
				}
				if(0<=fba && fba<176){	/*XvCgʊȌꍇI[o[t[*/
					tda=0x8000+(tmp8<<4)+offset;
					if(OBJ_F&0x8){
						tdd=VRAM[(tda++)-0x8000];
						tds=VRAM[tda-0x8000];
					}else{
						tdd=MEM[tda++];
						tds=MEM[tda];
					}
					/*OBJppbg, ʂQrbg͓F*/
					/*BGR->RGBϊ*/
					bgpd_offset=(OBJ_F&0x3)<<3;
					temp16 = (cgb_obj_pal[3+bgpd_offset]<<8)|cgb_obj_pal[2+bgpd_offset];
					obp1 = ((temp16>>10)&0x1F)|(temp16&0x3E0)|((temp16<<10)&0x7C00);
					temp16 = (cgb_obj_pal[5+bgpd_offset]<<8)|cgb_obj_pal[4+bgpd_offset];
					obp2 = ((temp16>>10)&0x1F)|(temp16&0x3E0)|((temp16<<10)&0x7C00);
					temp16 = (cgb_obj_pal[7+bgpd_offset]<<8)|cgb_obj_pal[6+bgpd_offset];
					obp3 = ((temp16>>10)&0x1F)|(temp16&0x3E0)|((temp16<<10)&0x7C00);
					for(tilebit=OBJ_F&0x20?0x01:0x80; tilebit;	/*XvCĝX]*/
						tilebit=OBJ_F&0x20?tilebit<<1:tilebit>>1, fba++){
						if(OBJ_F & 0x80){	/*DxႢXvCg̕\*/
							if(LineBuffer[fba]==cgb_bg_pal[0]){	/*pbgԍO*/
#ifdef	_ASM_
								__asm{
									mov		dl, tilebit
									mov		ebx, fba
									shl		ebx, 1
									lea		edi, [LineBuffer+ebx]
; start
									mov		al, tdd		; tdd&tds&tilebit
									mov		cl, tds
									and		al, cl
									and		al, dl
									cmp		al, dl
									jnz		objl_palette1
									mov		cx, obp3
									mov		[edi], cx
									jmp		objl_palette3
objl_palette1:
									mov		al, tdd		; ~tdd&tds&tilebit
									mov		cl, tds
									not		al
									and		al, cl
									and		al, dl
									cmp		al, dl
									jnz		objl_palette2
									mov		cx, obp2
									mov		[edi], cx
									jmp		objl_palette3
objl_palette2:
									mov		al, tdd		; tdd&~tds&tilebit
									mov		cl, tds
									not		cl
									and		al, cl
									and		al, dl
									cmp		al, dl
									jnz		objl_palette3
									mov		cx, obp1
									mov		[edi], cx
objl_palette3:
								}
#else
								lbuf=&LineBuffer[fba];
								if(tdd&~tds&tilebit)*lbuf=obp1;
								if(~tdd&tds&tilebit)*lbuf=obp2;
								if(tdd&tds&tilebit)*lbuf=obp3;
#endif	/*_ASM_*/
							}
						}else{	/*DxXvCg\*/
#ifdef	_ASM_
							__asm{
								mov		dl, tilebit
								mov		ebx, fba
								shl		ebx, 1
								lea		edi, [LineBuffer+ebx]
; start
								mov		al, tdd		; tdd&tds&tilebit
								mov		cl, tds
								and		al, cl
								and		al, dl
								cmp		al, dl
								jnz		objh_palette1
								mov		cx, obp3
								mov		[edi], cx
								jmp		objh_palette3
objh_palette1:
								mov		al, tdd		; ~tdd&tds&tilebit
								mov		cl, tds
								not		al
								and		al, cl
								and		al, dl
								cmp		al, dl
								jnz		objh_palette2
								mov		cx, obp2
								mov		[edi], cx
								jmp		objh_palette3
objh_palette2:
								mov		al, tdd		; tdd&~tds&tilebit
								mov		cl, tds
								not		cl
								and		al, cl
								and		al, dl
								cmp		al, dl
								jnz		objh_palette3
								mov		cx, obp1
								mov		[edi], cx
objh_palette3:
							}
#else
							lbuf=&LineBuffer[fba];
							if(tdd&~tds&tilebit)*lbuf=obp1;
							if(~tdd&tds&tilebit)*lbuf=obp2;
							if(tdd&tds&tilebit)*lbuf=obp3;
#endif	/*_ASM_*/
						}
					}
				}
			}
		}
	}

	/*Cobt@obNobt@֓]*/
#ifdef	_ASM_
	__asm{
		xor		eax, eax	; lbuf=&LineBuffer[Start];
		mov		al, Start
		add		eax, 8
		shl		eax, 1
		lea		esi, [LineBuffer+eax]

		xor		eax, eax	; fba=MEM[R_LY]*160;
		mov		al, MEM[R_LY]
		mov		ecx, eax	; Z߂gȂ
		shl		eax, 7		; 160=10100000b
		shl		ecx, 5
		add		eax, ecx	; 4clock
/*		xor		eax, eax
		mov		al, MEM[R_LY]
		imul	eax, 160	; 11clock
		mov		fba, eax*/
		shl		eax, 1
		lea		edi, [FrameBuffer16+eax]
		mov		ecx, 80		; 4byteœ]
transfer_label:
		mov		eax, [esi]
		mov		[edi], eax
		add		esi, 4
		add		edi, 4
		loop	transfer_label
	}
#else
	fba=MEM[R_LY]*160;
	Start+=8;
	lbuf=&LineBuffer[Start];
	fbuf=&FrameBuffer16[fba];
	for(tmpx=0; tmpx<160; tmpx++){
		*fbuf=*lbuf;
		lbuf++;
		fbuf++;
	}
#endif	/*_ASM_*/
}


_inline void DrawLine()
{	/*8bitpbg*/
	u32	fba, tma_offset, sign_offset;
	u16	tdba, tda, tma, tmpx;
	u8	tdd, tds, tilebit, tmp8;
	u8	Start, tpy, mpy, mpx;
	u8	bgp0, bgp1, bgp2, bgp3, obp1, obp2, obp3;
	u8	tbgp, tobp0, tobp1;
	static u8 LineBuffer[176];
#ifndef	_ASM_
	u8	*fbuf, *lbuf;
#endif	/*_ASM_*/

	/*if(MEM[R_LY]>=144)return;*/	/*A͕`悵Ȃ*/
	
	tbgp = MEM[R_BGP];
	bgp0 = tbgp&0x03;	/*BG,WNDppbg*/
	bgp1 = ((tbgp>>2)&0x03) + 4;
	bgp2 = ((tbgp>>4)&0x03) + 8;
	bgp3 = ((tbgp>>6)&0x03) + 12;

	memset(LineBuffer, 0, 176);

	if(MEM[R_LCDC]&0x10){	/*^Cf[^̃x[XAhX̑I*/
		tdba = 0x8000;
		sign_offset = 0x00;
	}else{
		tdba = 0x8800;
		sign_offset = 0x80;	/*t*/
	}

	Start = MEM[R_SCX];
	mpx = Start>>3;	/*^C}bvX*/
	Start&=7;		/*^Cf[^̃CX*/

	tpy = MEM[R_SCY]+MEM[R_LY];
	mpy = tpy>>3;	/*^C}bvY*/
	tpy&=7;			/*^Cf[^̃CY*/

	if((MEM[R_LCDC]&0x01) && EnableBG){	/*BG̕\*/
		tma = (MEM[R_LCDC]&0x08)?0x9C00:0x9800;	/*^C}bṽAhXI*/
		tma+= mpx+(mpy<<5);
		for(tmpx=0; tmpx<168; tmpx+=8){
			tma_offset=(31-mpx)<(tmpx>>3)?32:0;	/*^C̃I[o[bv*/
#ifdef	_ASM_
			__asm{
; tile data address
				xor		ebx, ebx
				mov		bx, tma
				sub		ebx, tma_offset
				xor		ecx, ecx
				mov		cl, [MEM+ebx]
				sub		ecx, sign_offset
				xor		eax, eax
				mov		al, cl
				shl		eax, 4
				add		ax, tdba	; tdba,tpy
				xor		ecx, ecx
				mov		cl, tpy
				shl		ecx, 1
				add		eax, ecx
				inc		tma
; eax==tda
				lea		esi, [MEM+eax]
				mov		cl, [esi]
				inc		esi
				mov		dl, [esi]
; fba=tmpx+8;
				xor		eax, eax
				mov		ax, tmpx
				add		eax, 8
				lea		edi, [LineBuffer+eax]
				xor		esi, esi	; tilebit
				mov		si, 80h
bg_start_loop:
				mov		al, cl		; tdd&tds&tilebit
				mov		bl, dl
				and		al, bl
				and		ax, si
				cmp		ax, si
				jnz		bg_palette0
				mov		bl, bgp3
				mov		[edi], bl
				jmp		bg_palette3
bg_palette0:
				mov		al, cl		; ~tdd&tds&tilebit
				mov		bl, dl
				not		al
				and		al, bl
				and		ax, si
				cmp		ax, si
				jnz		bg_palette1
				mov		bl, bgp2
				mov		[edi], bl
				jmp		bg_palette3
bg_palette1:
				mov		al, cl		; tdd&~tds&tilebit
				mov		bl, dl
				not		bl
				and		al, bl
				and		ax, si
				cmp		ax, si
				jnz		bg_palette2
				mov		bl, bgp1
				mov		[edi], bl
				jmp		bg_palette3
bg_palette2:
				mov		al, cl		; ~tdd&~tds&tilebit
				mov		bl, dl
				and		al, bl
				and		ax, si
				cmp		ax, si
				jz		bg_palette3
				mov		bl, bgp0
				mov		[edi], bl
bg_palette3:
				inc		edi
				shr		si, 1
				jnz		bg_start_loop
			}
#else
			tda = tdba+((u8)(MEM[(tma++)-tma_offset]-sign_offset)<<4)+(tpy<<1);
			tdd = MEM[tda++];
			tds = MEM[tda];
			fba = tmpx+8;
			lbuf=&LineBuffer[fba];
			for(tilebit=0x80; tilebit; tilebit>>=1, lbuf++){
				if(~tdd&~tds&tilebit)*lbuf=bgp0;
				if(tdd&~tds&tilebit)*lbuf=bgp1;
				if(~tdd&tds&tilebit)*lbuf=bgp2;
				if(tdd&tds&tilebit)*lbuf=bgp3;
			}
#endif	/*_ASM_*/
		}
	}

	if((MEM[R_LCDC]&0x20) && EnableWND){	/*EBhE̕\*/
		if(MEM[R_WY]<144 && MEM[R_LY]>=MEM[R_WY] && MEM[R_WX]<167 && MEM[R_WX]>=7){
			tpy = MEM[R_LY] - MEM[R_WY];
			mpy = tpy>>3;	/*^C}bvY*/
			tpy&= 7;	/*^Cf[^̃CY*/
			tma = (MEM[R_LCDC]&0x40)?0x9C00:0x9800;
			tma+= mpy<<5;
			for(tmpx=MEM[R_WX]+1; tmpx<168; tmpx+=8){
#ifdef	_ASM_
				__asm{
; tile data address
					xor		ebx, ebx
					mov		bx, tma
					xor		ecx, ecx
					mov		cl, [MEM+ebx]
					sub		ecx, sign_offset
					xor		eax, eax
					mov		al, cl
					shl		eax, 4
					add		ax, tdba	; tdba,tpy
					xor		ecx, ecx
					mov		cl, tpy
					shl		ecx, 1
					add		eax, ecx
					inc		tma
; eax==tda
					lea		esi, [MEM+eax]
					mov		cl, [esi]
					inc		esi
					mov		dl, [esi]
; fba=tmpx+Start
					xor		eax, eax
					mov		ax, tmpx
					inc		eax
					add		al, Start
					lea		edi, [LineBuffer+eax]
					mov		esi, 80h		; tilebit
wnd_start_loop:
					mov		al, cl		; tdd&tds&tilebit
					mov		bl, dl
					and		al, bl
					and		ax, si
					cmp		ax, si
					jnz		wnd_palette0
					mov		bl, bgp3
					mov		[edi], bl
					jmp		wnd_palette3
wnd_palette0:
					mov		al, cl		; ~tdd&tds&tilebit
					mov		bl, dl
					not		al
					and		al, bl
					and		ax, si
					cmp		ax, si
					jnz		wnd_palette1
					mov		bl, bgp2
					mov		[edi], bl
					jmp		wnd_palette3
wnd_palette1:
					mov		al, cl		; tdd&~tds&tilebit
					mov		bl, dl
					not		bl
					and		al, bl
					and		ax, si
					cmp		ax, si
					jnz		wnd_palette2
					mov		bl, bgp1
					mov		[edi], bl
					jmp		wnd_palette3
wnd_palette2:
					mov		al, cl		; ~tdd&~tds&tilebit
					mov		bl, dl
					and		al, bl
					and		ax, si
					cmp		ax, si
					jz		wnd_palette3
					mov		bl, bgp0
					mov		[edi], bl
wnd_palette3:
					inc		edi
					shr		si, 1
					jnz		wnd_start_loop
				}
#else
				tda = tdba+((u8)(MEM[tma++]+sign_offset)<<4)+(tpy<<1);
				tdd = MEM[tda++];
				tds = MEM[tda];
				fba = tmpx+Start;
				lbuf=&LineBuffer[fba];
				for(tilebit=0x80; tilebit; tilebit>>=1, lbuf++){
					if(~tdd&~tds&tilebit)*lbuf=bgp0;
					if(tdd&~tds&tilebit)*lbuf=bgp1;
					if(~tdd&tds&tilebit)*lbuf=bgp2;
					if(tdd&tds&tilebit)*lbuf=bgp3;
				}
#endif	/*_ASM_*/
			}
		}
	}

	if((MEM[R_LCDC]&0x02) && EnableOBJ){	/*XvCg̕\*/
		for(tma=0xFE02; tma<0xFEA2; tma+=4){
			if(OBJ_X && OBJ_Y){	/*XvCg͈͊Oɂ:Е̍WO*/
				fba=0;	/*͈͓ɏ*/
				tpy=MEM[R_LY]-OBJ_Y+16;
				if(MEM[R_LCDC]&0x04){	/*8*16[h*/
					if(tpy>=16)fba=255;	/*YW͈͊O*/
					else fba=OBJ_X+Start;
					tmp8=MEM[tma]&0xFE;	/*LSB͖*/
					sign_offset=tpy<<1;
					if(OBJ_F&0x40)sign_offset=0x1E-sign_offset;	/*Y]*/
				}else{	/*8*8[h*/
					if(tpy>=8)fba=255;	/*YW͈͊O*/
					else fba=OBJ_X+Start;
					tmp8=MEM[tma];
					sign_offset=tpy<<1;
					if(OBJ_F&0x40)sign_offset=0x0E-sign_offset;	/*Y]*/
				}
				if(0<=fba && fba<176){	/*XvCgʊȌꍇI[o[t[*/
					tda = 0x8000+(tmp8<<4)+sign_offset;
					tdd = MEM[tda++];
					tds = MEM[tda];
					/*OBJppbg, ʂQrbg͓F*/
					tobp0=MEM[R_OBP0];
					tobp1=MEM[R_OBP1];
					obp1=(OBJ_F&0x10)?0x03&(tobp1>>2):0x03&(tobp0>>2);
					obp2=(OBJ_F&0x10)?0x03&(tobp1>>4):0x03&(tobp0>>4);
					obp3=(OBJ_F&0x10)?0x03&(tobp1>>6):0x03&(tobp0>>6);
					for(tilebit=OBJ_F&0x20?0x01:0x80; tilebit;	/*XvCĝX]*/
						tilebit=OBJ_F&0x20?tilebit<<1:tilebit>>1, fba++){
						if(OBJ_F & 0x80){	/*DxႢXvCg̕\*/
							if(!LineBuffer[fba]){	/*pbgԍO*/
#ifdef	_ASM_
								__asm{
									mov		cl, tdd
									mov		dl, tds

									xor		eax, eax
									mov		al, tilebit
									mov		si, ax
									mov		ebx, fba
									lea		edi, [LineBuffer+ebx]
; start
									mov		al, cl		; tdd&tds&tilebit
									mov		bl, dl
									and		al, bl
									and		ax, si
									cmp		ax, si
									jnz		objl_palette1
									mov		bl, obp3
									mov		[edi], bl
									jmp		objl_palette3
objl_palette1:
									mov		al, cl		; ~tdd&tds&tilebit
									mov		bl, dl
									not		al
									and		al, bl
									and		ax, si
									cmp		ax, si
									jnz		objl_palette2
									mov		bl, obp2
									mov		[edi], bl
									jmp		objl_palette3
objl_palette2:
									mov		al, tdd		; tdd&~tds&tilebit
									mov		bl, tds
									not		bl
									and		al, bl
									and		ax, si
									cmp		ax, si
									jnz		objl_palette3
									mov		bl, obp1
									mov		[edi], bl
objl_palette3:
								}
#else
								lbuf=&LineBuffer[fba];
								if(tdd&~tds&tilebit)*lbuf=obp1;
								if(~tdd&tds&tilebit)*lbuf=obp2;
								if(tdd&tds&tilebit)*lbuf=obp3;
#endif	/*_ASM_*/
							}
						}else{	/*DxXvCg\*/
#ifdef	_ASM_
							__asm{
								mov		cl, tdd
								mov		dl, tds

								xor		eax, eax
								mov		al, tilebit
								mov		si, ax
								mov		ebx, fba
								lea		edi, [LineBuffer+ebx]
; start
								mov		al, cl		; tdd&tds&tilebit
								mov		bl, dl
								and		al, bl
								and		ax, si
								cmp		ax, si
								jnz		objh_palette1
								mov		bl, obp3
								mov		[edi], bl
								jmp		objh_palette3
objh_palette1:
								mov		al, cl		; ~tdd&tds&tilebit
								mov		bl, dl
								not		al
								and		al, bl
								and		ax, si
								cmp		ax, si
								jnz		objh_palette2
								mov		bl, obp2
								mov		[edi], bl
								jmp		objh_palette3
objh_palette2:
								mov		al, cl		; tdd&~tds&tilebit
								mov		bl, dl
								not		bl
								and		al, bl
								and		ax, si
								cmp		ax, si
								jnz		objh_palette3
								mov		bl, obp1
								mov		[edi], bl
objh_palette3:
							}
#else
							lbuf=&LineBuffer[fba];
							if(tdd&~tds&tilebit)*lbuf=obp1;
							if(~tdd&tds&tilebit)*lbuf=obp2;
							if(tdd&tds&tilebit)*lbuf=obp3;
#endif	/*_ASM_*/
						}
					}
				}
			}
		}
	}

	/*Cobt@obNobt@֓]*/
#ifdef	_ASM_
	__asm{
		xor		eax, eax	; lbuf=&LineBuffer[Start];
		mov		al, Start
		add		eax, 8
		lea		esi, [LineBuffer+eax]

		xor		eax, eax	; fba=MEM[R_LY]*160;
		mov		al, MEM[R_LY]
		mov		ecx, eax	; Z߂gȂ
		shl		eax, 7		; 160=10100000b
		shl		ecx, 5
		add		eax, ecx	; 4clock
/*		xor		eax, eax
		mov		al, MEM[R_LY]
		imul	eax, 160	; 11clock
		mov		fba, eax*/
		lea		edi, [FrameBuffer+eax]
		mov		ecx, 40		; 4byteœ]
transfer_label:
		mov		eax, [esi]
		mov		[edi], eax
		add		esi, 4
		add		edi, 4
		loop	transfer_label
	}
#else
	fba=MEM[R_LY]*160;
	Start+=8;
	lbuf=&LineBuffer[Start];
	fbuf=&FrameBuffer[fba];
	for(tmpx=0; tmpx<160; tmpx++){
		*fbuf=*lbuf;
		lbuf++;
		fbuf++;
	}
#endif	/*_ASM_*/

}







#ifdef	_WIN32
static void DrawLine16()
{	/*16bitrbg}bv*/
	u32	fba, offset, tma_offset;
	u16	tdba, tda, tma, tmpx;
	u8	tdd, tds, tilebit, tmp8;
	u8	Start, tpy, mpy, mpx;
	u16	bgp0, bgp1, bgp2, bgp3, obp1, obp2, obp3;
	u16	tbgp, tobp0, tobp1;
	u16	LineBuffer[176];
#ifndef	_ASM_
	u16	*fbuf, *lbuf;
#endif	/*_ASM_*/

	/*if(MEM[R_LY]>=144)return;*/	/*A͕`悵Ȃ*/

	tma_offset=0;

	tbgp = MEM[R_BGP];
	bgp0 = Colours16[(tbgp&0x03)];	/*BG,WNDppbg*/
	bgp1 = Colours16[(tbgp>>2&0x03)]+1;
	bgp2 = Colours16[(tbgp>>4&0x03)]+32;
	bgp3 = Colours16[(tbgp>>6&0x03)]+1024;

	memset(LineBuffer, 0, 176);

	if(MEM[R_LCDC]&0x10){	/*^Cf[^̃x[XAhX̑I*/
		tdba = 0x8000;
		offset = 0x00;
	}else{
		tdba = 0x8800;
		offset = 0x80;	/*t*/
	}

	Start=MEM[R_SCX];
	mpx=Start>>3;	/*^C}bvX*/
	Start&=7;	/*^Cf[^̃CX*/
	
	tpy=MEM[R_SCY]+MEM[R_LY];
	mpy=tpy>>3;	/*^C}bvY*/
	tpy&=7;	/*^Cf[^̃CY*/

	if((MEM[R_LCDC]&0x01) && EnableBG){	/*BG̕\*/
		tma=(MEM[R_LCDC]&0x08)?0x9C00:0x9800;	/*^C}bṽAhXI*/
		tma+=mpx+(mpy<<5);
		for(tmpx=0; tmpx<168; tmpx+=8){
			if((31-mpx)<(tmpx>>3))tma_offset=32;	/*^C̃I[o[bv*/
/*			if(offset)tda=0x9000+((s8)(MEM[(tma++)-tma_offset])<<4)+(tpy<<1);
			else tda=tdba+((u8)(MEM[(tma++)-tma_offset])<<4)+(tpy<<1);*/
			tda=tdba+((u8)(MEM[(tma++)-tma_offset]-offset)<<4)+(tpy<<1);
			tdd=MEM[tda++];
			tds=MEM[tda];
#ifdef	_ASM_
			__asm{
				xor		eax, eax	; fba=tmpx+Start
				mov		ax, tmpx
				add		eax, 8
				shl		eax, 1
				lea		edi, [LineBuffer+eax]
				mov		dl, 80h
bg_start_loop:
				mov		al, tdd		; tdd&tds&tilebit
				mov		cl, tds
				and		al, cl
				and		al, dl
				cmp		al, dl
				jnz		bg_palette0
				mov		cx, bgp3
				mov		[edi], cx
				jmp		bg_palette3
bg_palette0:
				mov		al, tdd		; ~tdd&tds&tilebit
				mov		cl, tds
				not		al
				and		al, cl
				and		al, dl
				cmp		al, dl
				jnz		bg_palette1
				mov		cx, bgp2
				mov		[edi], cx
				jmp		bg_palette3
bg_palette1:
				mov		al, tdd		; tdd&~tds&tilebit, 
				mov		cl, tds
				not		cl
				and		al, cl
				and		al, dl
				cmp		al, dl
				jnz		bg_palette2
				mov		cx, bgp1
				mov		[edi], cx
				jmp		bg_palette3
bg_palette2:
				mov		al, tdd		; ~tdd&~tds&tilebit
				mov		cl, tds
				and		al, cl
				and		al, dl
				cmp		al, dl
				jz		bg_palette3
				mov		cx, bgp0
				mov		[edi], cx
bg_palette3:
				add		edi, 2
				shr		dl, 1
				jnz		bg_start_loop
			}
#else
			fba=tmpx+8;
			lbuf=&LineBuffer[fba];
			for(tilebit=0x80; tilebit; tilebit>>=1, lbuf++){
				if(~tdd&~tds&tilebit)*lbuf=bgp0;
				if(tdd&~tds&tilebit)*lbuf=bgp1;
				if(~tdd&tds&tilebit)*lbuf=bgp2;
				if(tdd&tds&tilebit)*lbuf=bgp3;
			}
#endif	/*_ASM_*/
		}
	}

	if((MEM[R_LCDC]&0x20) && EnableWND){	/*EBhE̕\*/
		if(MEM[R_WY]<144 && MEM[R_LY]>=MEM[R_WY] && MEM[R_WX]<167 && MEM[R_WX]>=7){
			tpy=MEM[R_LY]-MEM[R_WY];
			mpy=tpy>>3;	/*^C}bvY*/
			tpy&=7;	/*^Cf[^̃CY*/
			tma=(MEM[R_LCDC]&0x40)?0x9C00:0x9800;
			tma+=mpy<<5;
			for(tmpx=MEM[R_WX]+1; tmpx<168; tmpx+=8){
				tda=tdba+((u8)(MEM[tma++]+offset)<<4)+(tpy<<1);
				tdd=MEM[tda++];
				tds=MEM[tda];
				fba=tmpx+Start;
#ifdef	_ASM_
				__asm{
					xor		eax, eax	; fba=tmpx+Start
					mov		ax, tmpx
					add		al, Start
					shl		eax, 1
					lea		edi, [LineBuffer+eax]
					mov		dl, 80h		; tilebit
wnd_start_loop:
					mov		al, tdd		; tdd&tds&tilebit
					mov		cl, tds
					and		al, cl
					and		al, dl
					cmp		al, dl
					jnz		wnd_palette0
					mov		cx, bgp3
					mov		[edi], cx
					jmp		wnd_palette3
wnd_palette0:
					mov		al, tdd		; ~tdd&tds&tilebit
					mov		cl, tds
					not		al
					and		al, cl
					and		al, dl
					cmp		al, dl
					jnz		wnd_palette1
					mov		cx, bgp2
					mov		[edi], cx
					jmp		wnd_palette3
wnd_palette1:
					mov		al, tdd		; tdd&~tds&tilebit
					mov		cl, tds
					not		cl
					and		al, cl
					and		al, dl
					cmp		al, dl
					jnz		wnd_palette2
					mov		cx, bgp1
					mov		[edi], cx
					jmp		wnd_palette3
wnd_palette2:
					mov		al, tdd		; ~tdd&~tds&tilebit
					mov		cl, tds
					and		al, cl
					and		al, dl
					cmp		al, dl
					jz		wnd_palette3
					mov		cx, bgp0
					mov		[edi], cx
wnd_palette3:
					add		edi, 2
					shr		dl, 1
					jnz		wnd_start_loop
				}
#else
				fba = tmpx + Start;
				lbuf=&LineBuffer[fba];
				for(tilebit=0x80; tilebit; tilebit>>=1, *lbuf++){
					if(~tdd&~tds&tilebit)*lbuf=bgp0;
					if(tdd&~tds&tilebit)*lbuf=bgp1;
					if(~tdd&tds&tilebit)*lbuf=bgp2;
					if(tdd&tds&tilebit)*lbuf=bgp3;
				}
#endif	/*_ASM_*/
			}
		}
	}

	if((MEM[R_LCDC]&0x02) && EnableOBJ){
		for(tma=0xFE02; tma<0xFEA2; tma+=4){
			if(OBJ_X && OBJ_Y){	/*XvCg͈͊Oɂ:Е̍WO*/
				fba=0;	/*͈͓ɏ*/
				tpy=MEM[R_LY]-OBJ_Y+16;
				if(MEM[R_LCDC]&0x04){	/*8*16[h*/
					if(tpy>=16)fba=255;	/*YW͈͊O*/
					else fba=OBJ_X+Start;
					tmp8=MEM[tma]&0xFE;	/*LSB͖*/
					offset=tpy<<1;
					if(OBJ_F&0x40)offset=0x1E-offset;	/*Y]*/
				}else{	/*8*8[h*/
					if(tpy>=8)fba=255;	/*YW͈͊O*/
					else fba=OBJ_X+Start;
					tmp8=MEM[tma];
					offset=tpy<<1;
					if(OBJ_F&0x40)offset=0x0E-offset;	/*Y]*/
				}
				if(0<=fba && fba<176){	/*XvCgʊȌꍇI[o[t[*/
					tda=0x8000+(tmp8<<4)+offset;
					tdd=MEM[tda++];
					tds=MEM[tda];	
					/*OBJppbg, ʂQrbg͓F*/
					tobp0 = MEM[R_OBP0];
					tobp1 = MEM[R_OBP1];
					obp1=(OBJ_F&0x10)?0x03&(tobp1>>2):0x03&(tobp0>>2);
					obp2=(OBJ_F&0x10)?0x03&(tobp1>>4):0x03&(tobp0>>4);
					obp3=(OBJ_F&0x10)?0x03&(tobp1>>6):0x03&(tobp0>>6);
					obp1=Colours16[(obp1)];
					obp2=Colours16[(obp2)];
					obp3=Colours16[(obp3)];
					for(tilebit=OBJ_F&0x20?0x01:0x80; tilebit;	/*XvCĝX]*/
						tilebit=OBJ_F&0x20?tilebit<<1:tilebit>>1, fba++){
						if(OBJ_F & 0x80){	/*DxႢXvCg̕\*/
							if(LineBuffer[fba]==Colours16[0]){	/*pbgԍO*/
#ifdef	_ASM_
								__asm{
									mov		dl, tilebit
									mov		ebx, fba
									shl		ebx, 1
									lea		edi, [LineBuffer+ebx]
; start
									mov		al, tdd		; tdd&tds&tilebit
									mov		cl, tds
									and		al, cl
									and		al, dl
									cmp		al, dl
									jnz		objl_palette1
									mov		cx, obp3
									mov		[edi], cx
									jmp		objl_palette3
objl_palette1:
									mov		al, tdd		; ~tdd&tds&tilebit
									mov		cl, tds
									not		al
									and		al, cl
									and		al, dl
									cmp		al, dl
									jnz		objl_palette2
									mov		cx, obp2
									mov		[edi], cx
									jmp		objl_palette3
objl_palette2:
									mov		al, tdd		; tdd&~tds&tilebit
									mov		cl, tds
									not		cl
									and		al, cl
									and		al, dl
									cmp		al, dl
									jnz		objl_palette3
									mov		cx, obp1
									mov		[edi], cx
objl_palette3:
								}
#else
								lbuf=&LineBuffer[fba];
								if(tdd&~tds&tilebit)*lbuf=obp1;
								if(~tdd&tds&tilebit)*lbuf=obp2;
								if(tdd&tds&tilebit)*lbuf=obp3;
#endif	/*_ASM_*/
							}
						}else{	/*DxXvCg\*/
#ifdef	_ASM_
							__asm{
								mov		dl, tilebit
								mov		ebx, fba
								shl		ebx, 1
								lea		edi, [LineBuffer+ebx]
; start
								mov		al, tdd		; tdd&tds&tilebit
								mov		cl, tds
								and		al, cl
								and		al, dl
								cmp		al, dl
								jnz		objh_palette1
								mov		cx, obp3
								mov		[edi], cx
								jmp		objh_palette3
objh_palette1:
								mov		al, tdd		; ~tdd&tds&tilebit
								mov		cl, tds
								not		al
								and		al, cl
								and		al, dl
								cmp		al, dl
								jnz		objh_palette2
								mov		cx, obp2
								mov		[edi], cx
								jmp		objh_palette3
objh_palette2:
								mov		al, tdd		; tdd&~tds&tilebit
								mov		cl, tds
								not		cl
								and		al, cl
								and		al, dl
								cmp		al, dl
								jnz		objh_palette3
								mov		cx, obp1
								mov		[edi], cx
objh_palette3:
							}
#else
							lbuf=&LineBuffer[fba];
							if(tdd&~tds&tilebit)*lbuf=obp1;
							if(~tdd&tds&tilebit)*lbuf=obp2;
							if(tdd&tds&tilebit)*lbuf=obp3;
#endif	/*_ASM_*/
						}
					}
				}
			}
		}
	}

	/*Cobt@obNobt@֓]*/
#ifdef	_ASM_
	__asm{
		xor		eax, eax	; lbuf=&LineBuffer[Start];
		mov		al, Start
		add		eax, 8
		shl		eax, 1
		lea		esi, [LineBuffer+eax]

		xor		eax, eax	; fba=MEM[R_LY]*160;
		mov		al, MEM[R_LY]
		mov		ecx, eax	; Z߂gȂ
		shl		eax, 7		; 160=10100000b
		shl		ecx, 5
		add		eax, ecx	; 4clock
/*		xor		eax, eax
		mov		al, MEM[R_LY]
		imul	eax, 160	; 11clock
		mov		fba, eax*/
		shl		eax, 1
		lea		edi, [FrameBuffer16+eax]
		mov		ecx, 80		; 4byteœ]
transfer_label:
		mov		eax, [esi]
		mov		[edi], eax
		add		esi, 4
		add		edi, 4
		loop	transfer_label
	}
#else
	fba=MEM[R_LY]*160;
	Start+=8;
	lbuf=&LineBuffer[Start];
	fbuf=&FrameBuffer16[fba];
	for(tmpx=0; tmpx<160; tmpx++){
		*fbuf=*lbuf;
		lbuf++;
		fbuf++;
	}
#endif	/*_ASM_*/
}


#endif	/*_WIN32*/




