/* FrameReconstructor.c */
/* 2009/07/02           */

#include "StdAfx.h"

#include "FrameReconstructor.h"

/* */

static void Block_Extract8x8(
	const Plane_t* plane,
	INT32          x,
	INT32          y,
	UINT8*         block)
{
	INT32 i, j;

	for (i = 0; i < 8; i++) {
		for (j = 0; j < 8; j++) {
			INT32 xx = x + j;
			INT32 yy = y + i;

			if (xx < 0) {
				xx = 0;
			} else if (xx >= plane->CX) {
				xx = plane->CX - 1;
			}

			if (yy < 0) {
				yy = 0;
			} else if (yy >= plane->CY) {
				yy = plane->CY - 1;
			}

			block[i * 8 + j] = plane->Plane[yy * plane->Pitch + xx];
		}
	}
}

/* */

static void Block_CopyIntra8x8(
	Plane_t*     p,
	INT32        x,
	INT32        y,
	const INT16* block)
{
	const INT16* s = block;

	UINT8* d = p->Plane + y * p->Pitch + x;
	UINT8* e = d + 8 * p->Pitch;

	UINT8 v0[2] = { 0,   0 };
	UINT8 v1[2] = { 0, 255 };

	INT32 i;

	for (; d < e; d += p->Pitch) {
		for (i = 0; i < 8; i++, s++) {
			INT32 pix = 128 + *s;
			v0[0] = pix;
			v1[0] = v0[pix <   0];
			d [i] = v1[pix > 255];
		}
	}
}

static void Block_ReviseInter8x8(
	Plane_t*     p,
	INT32        x,
	INT32        y,
	const INT16* block)
{
	const INT16* s = block;

	UINT8* d = p->Plane + y * p->Pitch + x;
	UINT8* e = d + 8 * p->Pitch;

	UINT8 v0[2] = { 0,   0 };
	UINT8 v1[2] = { 0, 255 };

	INT32 i;

	for (; d < e; d += p->Pitch) {
		for (i = 0; i < 8; i++, s++) {
			INT32 pix = d[i] + *s;
			v0[0] = pix;
			v1[0] = v0[pix <   0];
			d [i] = v1[pix > 255];
		}
	}
}

/* */

static void Block_CopyPlane16x16(
	Plane_t* p,
	INT32    x,
	INT32    y,
	Plane_t* s)
{
	UINT8* d0 = p->Plane + y * p->Pitch + x;
	UINT8* e0 = d0 + 16 * p->Pitch;

	const UINT8* s0 = s->Plane + y * s->Pitch + x;

	for (; d0 < e0; d0 += p->Pitch, s0 += s->Pitch) {
		d0[ 0] = s0[ 0];
		d0[ 1] = s0[ 1];
		d0[ 2] = s0[ 2];
		d0[ 3] = s0[ 3];

		d0[ 4] = s0[ 4];
		d0[ 5] = s0[ 5];
		d0[ 6] = s0[ 6];
		d0[ 7] = s0[ 7];

		d0[ 8] = s0[ 8];
		d0[ 9] = s0[ 9];
		d0[10] = s0[10];
		d0[11] = s0[11];

		d0[12] = s0[12];
		d0[13] = s0[13];
		d0[14] = s0[14];
		d0[15] = s0[15];
	}
}

static void Block_CopyPlane8x8(
	Plane_t* p,
	INT32    x,
	INT32    y,
	Plane_t* s)
{
	UINT8* d0 = p->Plane + y * p->Pitch + x;
	UINT8* e0 = d0 + 8 * p->Pitch;

	const UINT8* s0 = s->Plane + y * s->Pitch + x;

	for (; d0 < e0; d0 += p->Pitch, s0 += s->Pitch) {
		d0[0] = s0[0];
		d0[1] = s0[1];
		d0[2] = s0[2];
		d0[3] = s0[3];

		d0[4] = s0[4];
		d0[5] = s0[5];
		d0[6] = s0[6];
		d0[7] = s0[7];
	}
}

/* */

static void MotionComp_Compensate8x8(
	UINT8*         p,
	INT32          pitch,
	const Plane_t* r,
	INT32          x,
	INT32          y);

static void MotionComp_Compensate8x8H(
	UINT8*         p,
	INT32          pitch,
	const Plane_t* r,
	INT32          x0,
	INT32          y0,
	INT32          x1,
	INT32          y1);

/* */

static void MotionComp_Compensate16x16(
	UINT8*         p,
	INT32          pitch,
	const Plane_t* r,
	INT32          x,
	INT32          y)
{
	if (x >= 0 && x + 16 < r->CX &&
		y >= 0 && y + 16 < r->CY) {
		const UINT8* s = r->Plane + y * r->Pitch + x;

		UINT8* d = p;
		UINT8* e = p + 16 * pitch;
		while (d < e) {
			d[ 0] = s[ 0];
			d[ 1] = s[ 1];
			d[ 2] = s[ 2];
			d[ 3] = s[ 3];

			d[ 4] = s[ 4];
			d[ 5] = s[ 5];
			d[ 6] = s[ 6];
			d[ 7] = s[ 7];

			d[ 8] = s[ 8];
			d[ 9] = s[ 9];
			d[10] = s[10];
			d[11] = s[11];

			d[12] = s[12];
			d[13] = s[13];
			d[14] = s[14];
			d[15] = s[15];

			s += r->Pitch;
			d += pitch;
		}

	} else {
		MotionComp_Compensate8x8(p,                 pitch, r, x,     y    );
		MotionComp_Compensate8x8(p + 8,             pitch, r, x + 8, y    );
		MotionComp_Compensate8x8(p     + 8 * pitch, pitch, r, x,     y + 8);
		MotionComp_Compensate8x8(p + 8 + 8 * pitch, pitch, r, x + 8, y + 8);
	}
}

static void MotionComp_Compensate8x8(
	UINT8*         p,
	INT32          pitch,
	const Plane_t* r,
	INT32          x,
	INT32          y)
{
	UINT8 b[64];

	const UINT8* s0 = r->Plane + y * r->Pitch + x;
	INT32        p0 = r->Pitch;

	UINT8* d = p;
	UINT8* e = p + 8 * pitch;

	if (x < 0 || x + 8 >= r->CX ||
		y < 0 || y + 8 >= r->CY) {
		s0 = b;
		p0 = 8;
		Block_Extract8x8(r, x, y, b);
	}

	while (d < e) {
		d[ 0] = s0[ 0];
		d[ 1] = s0[ 1];
		d[ 2] = s0[ 2];
		d[ 3] = s0[ 3];

		d[ 4] = s0[ 4];
		d[ 5] = s0[ 5];
		d[ 6] = s0[ 6];
		d[ 7] = s0[ 7];

		s0 += p0;
		d  += pitch;
	}
}

static void MotionComp_Compensate16x16H(
	UINT8*         p,
	INT32          pitch,
	const Plane_t* r,
	INT32          x0,
	INT32          y0,
	INT32          x1,
	INT32          y1)
{
	if (x0 >= 0 && x0 + 16 < r->CX &&
		y0 >= 0 && y0 + 16 < r->CY &&
		x1 >= 0 && x1 + 16 < r->CX &&
		y1 >= 0 && y1 + 16 < r->CY) {
		const UINT8* s0 = r->Plane + y0 * r->Pitch + x0;
		const UINT8* s1 = r->Plane + y1 * r->Pitch + x1;

		UINT8* d = p;
		UINT8* e = p + 16 * pitch;
		while (d < e) {
			d[ 0] = (s0[ 0] + s1[ 0]) >> 1;
			d[ 1] = (s0[ 1] + s1[ 1]) >> 1;
			d[ 2] = (s0[ 2] + s1[ 2]) >> 1;
			d[ 3] = (s0[ 3] + s1[ 3]) >> 1;

			d[ 4] = (s0[ 4] + s1[ 4]) >> 1;
			d[ 5] = (s0[ 5] + s1[ 5]) >> 1;
			d[ 6] = (s0[ 6] + s1[ 6]) >> 1;
			d[ 7] = (s0[ 7] + s1[ 7]) >> 1;

			d[ 8] = (s0[ 8] + s1[ 8]) >> 1;
			d[ 9] = (s0[ 9] + s1[ 9]) >> 1;
			d[10] = (s0[10] + s1[10]) >> 1;
			d[11] = (s0[11] + s1[11]) >> 1;

			d[12] = (s0[12] + s1[12]) >> 1;
			d[13] = (s0[13] + s1[13]) >> 1;
			d[14] = (s0[14] + s1[14]) >> 1;
			d[15] = (s0[15] + s1[15]) >> 1;

			s0 += r->Pitch;
			s1 += r->Pitch;
			d  += pitch;
		}

	} else {
		MotionComp_Compensate8x8H(p,                 pitch, r, x0,     y0    , x1,     y1    );
		MotionComp_Compensate8x8H(p + 8,             pitch, r, x0 + 8, y0    , x1 + 8, y1    );
		MotionComp_Compensate8x8H(p     + 8 * pitch, pitch, r, x0,     y0 + 8, x1,     y1 + 8);
		MotionComp_Compensate8x8H(p + 8 + 8 * pitch, pitch, r, x0 + 8, y0 + 8, x1 + 8, y1 + 8);
	}
}

static void MotionComp_Compensate8x8H(
	UINT8*         p,
	INT32          pitch,
	const Plane_t* r,
	INT32          x0,
	INT32          y0,
	INT32          x1,
	INT32          y1)
{
	UINT8 b0[64], b1[64];

	const UINT8* s0 = r->Plane + y0 * r->Pitch + x0;
	INT32        p0 = r->Pitch;

	const UINT8* s1 = r->Plane + y1 * r->Pitch + x1;
	INT32        p1 = r->Pitch;

	UINT8* d = p;
	UINT8* e = p + 8 * pitch;

	if (x0 < 0 || x0 + 8 >= r->CX ||
		y0 < 0 || y0 + 8 >= r->CY ||
		x1 < 0 || x1 + 8 >= r->CX ||
		y1 < 0 || y1 + 8 >= r->CY) {
		s0 = b0;
		p0 = 8;

		s1 = b1;
		p1 = 8;

		Block_Extract8x8(r, x0, y0, b0);
		Block_Extract8x8(r, x1, y1, b1);
	}

	while (d < e) {
		d[0] = (s0[0] + s1[0]) >> 1;
		d[1] = (s0[1] + s1[1]) >> 1;
		d[2] = (s0[2] + s1[2]) >> 1;
		d[3] = (s0[3] + s1[3]) >> 1;

		d[4] = (s0[4] + s1[4]) >> 1;
		d[5] = (s0[5] + s1[5]) >> 1;
		d[6] = (s0[6] + s1[6]) >> 1;
		d[7] = (s0[7] + s1[7]) >> 1;

		s0 += p0;
		s1 += p1;
		d  += pitch;
	}
}

/* */

static void MotionComp_Block16x16(
	Plane_t*              p,
	INT32                 x,
	INT32                 y,
	const Plane_t*        r,
	const MotionVector_t* mv)
{
	INT32 dx = ((mv->X & 1) != 0);
	INT32 dy = ((mv->Y & 1) != 0);

	INT32 vx[2] = { mv->X >> 1 };
	INT32 vy[2] = { mv->Y >> 1 };

	UINT8* d = p->Plane + y * p->Pitch + x;

	if (dx == 0 && dy == 0) {
		MotionComp_Compensate16x16(
			d,
			p->Pitch,
			r,
			x + vx[0],
			y + vy[0]);

	} else {
		vx[1] = vx[0];
		vy[1] = vy[0];

		vx[mv->X >= 0] += dx;
		vy[mv->Y >= 0] += dy;

		MotionComp_Compensate16x16H(
			d,
			p->Pitch,
			r,
			x + vx[0],
			y + vy[0],
			x + vx[1],
			y + vy[1]);
	}
}

static void MotionComp_Block8x8Y(
	Plane_t*              p,
	INT32                 x,
	INT32                 y,
	const Plane_t*        r,
	const MotionVector_t* mv)
{
	INT32 dx = ((mv->X & 1) != 0);
	INT32 dy = ((mv->Y & 1) != 0);

	INT32 vx[2] = { mv->X >> 1 };
	INT32 vy[2] = { mv->Y >> 1 };

	UINT8* d = p->Plane + y * p->Pitch + x;

	if (dx == 0 && dy == 0) {
		MotionComp_Compensate8x8(
			d,
			p->Pitch,
			r,
			x + vx[0],
			y + vy[0]);

	} else {
		vx[1] = vx[0];
		vy[1] = vy[0];

		vx[mv->X >= 0] += dx;
		vy[mv->Y >= 0] += dy;

		MotionComp_Compensate8x8H(
			d,
			p->Pitch,
			r,
			x + vx[0],
			y + vy[0],
			x + vx[1],
			y + vy[1]);
	}
}

static void MotionComp_Block8x8C(
	Plane_t*              p,
	INT32                 x,
	INT32                 y,
	const Plane_t*        r,
	const MotionVector_t* mv0)
{
	MotionVector_t mv = {
		(mv0->X >> 1) | (mv0->X & 1),
		(mv0->Y >> 1) | (mv0->Y & 1)
	};

	INT32 dx = ((mv.X & 1) != 0);
	INT32 dy = ((mv.Y & 1) != 0);

	INT32 vx[2] = { mv.X >> 1 };
	INT32 vy[2] = { mv.Y >> 1 };

	UINT8* d = p->Plane + y * p->Pitch + x;

	if (dx == 0 && dy == 0) {
		MotionComp_Compensate8x8(
			d,
			p->Pitch,
			r,
			x + vx[0],
			y + vy[0]);

	} else {
		vx[1] = vx[0];
		vy[1] = vy[0];

		vx[mv.X >= 0] += dx;
		vy[mv.Y >= 0] += dy;

		MotionComp_Compensate8x8H(
			d,
			p->Pitch,
			r,
			x + vx[0],
			y + vy[0],
			x + vx[1],
			y + vy[1]);
	}
}

/* */

static const INT32 COS[8] = {
	65536,
	64277,
	60547,
	54491,
	46341,
	36410,
	25080,
	12785
};

#define MUL(T,X) ((COS[T] * (X)) >> 16)

static void IDCT_8(
	const INT16* x,
	INT16*       y)
{
	INT32 t[8];
	INT32 t_3, t_2, t_5;
	INT16 s[2];

	/* Stage.1 */

	s[0] = x[0] + x[4];
	s[1] = x[0] - x[4];

	t[0] = MUL(4, s[0]);
	t[1] = MUL(4, s[1]);

	t[2] = MUL(6, x[2]) - MUL(2, x[6]);
	t[3] = MUL(2, x[2]) + MUL(6, x[6]);

	t[4] = MUL(7, x[1]) - MUL(1, x[7]);
	t[5] = MUL(3, x[5]) - MUL(5, x[3]);
	t[6] = MUL(5, x[5]) + MUL(3, x[3]);
	t[7] = MUL(1, x[1]) + MUL(7, x[7]);

	/* Stage.2 */

	s[0] = t[4] - t[5];
	s[1] = t[7] - t[6];

	t[4] += t[5];
	t[5]  = MUL(4, s[0]);

	t[7] += t[6];
	t[6]  = MUL(4, s[1]);

	/* Stage.3 */

	t_3 = t[0] - t[3];
	t_2 = t[1] - t[2];
	t_5 = t[6] - t[5];

	t[0] += t[3];
	t[1] += t[2];
	t[6] += t[5];

	/* Stage.4 */

	y[0*8] = t[0] + t[7];
	y[1*8] = t[1] + t[6];
	y[2*8] = t_2  + t_5 ;
	y[3*8] = t_3  + t[4];
	y[4*8] = t_3  - t[4];
	y[5*8] = t_2  - t_5 ;
	y[6*8] = t[1] - t[6];
	y[7*8] = t[0] - t[7];
}

/* */

static void IDCT_8x8(
	const INT16* x,
	INT16*       y)
{
	const INT16* xx;
	INT16*       yy;
	INT16*       end;
	INT16        w[64];

	for (xx = x, yy = w, end = yy + 8; yy < end; xx += 4*8, yy += 4) {
		IDCT_8(xx + 0*8, yy + 0);
		IDCT_8(xx + 1*8, yy + 1);
		IDCT_8(xx + 2*8, yy + 2);
		IDCT_8(xx + 3*8, yy + 3);
	}

	for (xx = w, yy = y, end = yy + 8; yy < end; xx += 4*8, yy += 4) {
		IDCT_8(xx + 0*8, yy + 0);
		IDCT_8(xx + 1*8, yy + 1);
		IDCT_8(xx + 2*8, yy + 2);
		IDCT_8(xx + 3*8, yy + 3);
	}

	for (yy = y, end = yy + 64; yy < end; yy += 4) {
		yy[0] = (yy[0] + 8) >> 4;
		yy[1] = (yy[1] + 8) >> 4;
		yy[2] = (yy[2] + 8) >> 4;
		yy[3] = (yy[3] + 8) >> 4;
	}
}

/* */

struct DecodeCoefficientsContext {

	INT32 EOB_Run[64];

	INT8*  Run  [64];
	INT16* Coeff[64];

}; /* DecodeCoefficientsContext */

typedef struct DecodeCoefficientsContext DecodeCoefficientsContext_t;

static INT32 DecodeCoefficients(
	FrameDecoder_t*              t,
	DecodeCoefficientsContext_t* ctx,
	INT16*                       block)
{
	INT16* b = block;
	INT16* e = b + 64;

	INT32 i = 0;

	while (b < e) {
		if (ctx->EOB_Run[i] > 0) {
			ctx->EOB_Run[i] -= 1;
			break;

		} else {
			INT32 run   = *((ctx->Run  [i])++);
			INT16 coeff = *((ctx->Coeff[i])++);

			if (run < 0) {
				ctx->EOB_Run[i] = coeff;

			} else {
				INT16* p = b + run;
				if (p >= e) {
					break;
				}

				while (b < p) {
					*(b++) = 0;
				}

				*(b++) = coeff;

				i = b - block;
			}
		}
	}

	while (b < e) {
		*(b++) = 0;
	}

	return i;
}

/* */

static const UINT8 ZIGZAG[64] = {
	 0, 1, 5, 6,14,15,27,28,
	 2, 4, 7,13,16,26,29,42,
	 3, 8,12,17,25,30,41,43,
	 9,11,18,24,31,40,44,53,
	10,19,23,32,39,45,52,54,
	20,22,33,38,46,51,55,60,
	21,34,37,47,50,56,59,61,
	35,36,48,49,57,58,62,63
};

static void Dequantize_DoDequantize(
	const DequantizeMatrix_t* m,
	INT32                     intra,
	INT32                     plane,
	INT16*                    block,
	const INT16*              fragment)
{
	const INT16* mat = m->Matrix[intra][plane];

	INT32 i;
	for (i = 0; i < 64; i++) {
		block[i] = fragment[ZIGZAG[i]] * mat[i];
	}
}

/* */

static void Reconstruct_IntraBlock(
	FrameDecoder_t*              t,
	Plane_t*                     p,
	INT32                        x,
	INT32                        y,
	INT16                        dc,
	INT32                        qi,
	INT32                        plane,
	Plane_t*                     r,
	DecodeCoefficientsContext_t* ctx)
{
	INT16 block[64];
	INT16 coeff[64];

	if (dc == NOT_CODED) {
		Block_CopyPlane8x8(p, x, y, r);
		return;
	}

	DecodeCoefficients(t, ctx, block);

	block[0] = dc;

	Dequantize_DoDequantize(
		&(t->Dequantize[qi]),
		0,
		plane,
		coeff,
		block);

	IDCT_8x8(coeff, coeff);

	Block_CopyIntra8x8(p, x, y, coeff);
}

/* */

static void Reconstruct_InterBlock(
	FrameDecoder_t*              t,
	Plane_t*                     p,
	INT32                        x,
	INT32                        y,
	INT16                        dc,
	INT32                        qi,
	INT32                        plane,
	Plane_t*                     r,
	DecodeCoefficientsContext_t* ctx)
{
	INT16 block[64];
	INT16 coeff[64];

	if (dc == NOT_CODED) {
		if (r != NULL) {
			Block_CopyPlane8x8(p, x, y, r);
		}
		return;
	}

	DecodeCoefficients(t, ctx, block);

	block[0] = dc;

	Dequantize_DoDequantize(
		&(t->Dequantize[qi]),
		1,
		plane,
		coeff,
		block);

	IDCT_8x8(coeff, coeff);

	Block_ReviseInter8x8(p, x, y, coeff);
}

/* */

static const INT8 S_PX[16] = {
	0*8, 1*8, 1*8, 0*8,
	0*8, 0*8, 1*8, 1*8,
	2*8, 2*8, 3*8, 3*8,
	3*8, 2*8, 2*8, 3*8
};

static const INT8 S_PY[16] = {
	0*8, 0*8, 1*8, 1*8,
	2*8, 3*8, 3*8, 2*8,
	2*8, 3*8, 3*8, 2*8,
	1*8, 1*8, 0*8, 0*8
};

static const INT8 M_PX[4] = {
	0*16, 0*16,
	1*16, 1*16
};

static const INT8 M_PY[4] = {
	0*16, 1*16,
	1*16, 0*16
};

/* */

static void Reconstruct_YPlane(
	FrameDecoder_t* t)
{
	INT32 x, y;

	INT32 sx = t->Index->SX[0] * 32;
	INT32 sy = t->Index->SY[0] * 32;

	INT32 mx = t->Index->MX * 16;
	INT32 my = t->Index->MY * 16;

	INT32 bx = t->Index->BX[0];

	const UINT16* bi = t->Index->BIndex[0];

	Plane_t* g = t->Frame[0];
	Plane_t* p = t->Frame[1];
	Plane_t* r = t->Frame[2];

	const UINT8*          mm = t->MBMode;
	const MotionVector_t* mv = t->MV;

	const UINT8* qi = t->BQI;

	DecodeCoefficientsContext_t ctx = { 0 };

	INT32 i;
	for (i = 0; i < 64; i++) {
		ctx.Run  [i] = t->BRun  [0][i];
		ctx.Coeff[i] = t->BCoeff[0][i];
	}

	for (y = 0; y < sy; y += 32) {
		for (x = 0; x < sx; x += 32) {
			INT32 i = 0;

			INT32 m;
			for (m = 0; m < 4; m++, i += 4) {
				INT32 x0 = x + M_PX[m];
				INT32 y0 = y + M_PY[m];
				if (x0 < mx && y0 < my) {
					switch (*mm) {
					case 0: /* INTER_NOMV */
						Block_CopyPlane16x16(p, x0, y0, r);

						Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, NULL, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, NULL, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, NULL, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, NULL, &ctx);
						break;

					case 1: /* INTRA */
						Reconstruct_IntraBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
						Reconstruct_IntraBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
						Reconstruct_IntraBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
						Reconstruct_IntraBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
						break;

					case 2: /* INTER_MV */
					case 3: /* INTER_MV_LAST */
					case 4: /* INTER_MV_LAST2 */
						MotionComp_Block16x16(p, x0, y0, r, mv);

						Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
						break;

					case 5: /* INTER_GOLDEN_NOMV */
						Block_CopyPlane16x16(p, x0, y0, g);

						Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
						break;

					case 6: /* INTER_GOLDEN_MV */
						MotionComp_Block16x16(p, x0, y0, g, mv);

						Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
						break;

					case 7: /* INTER_MV_FOUR */
					{
						const MotionVector_t* v = mv;

						const INT16* dc = t->DC + (x0 >> 3) + (y0 >> 3) * bx;

						if (dc[0] != NOT_CODED) {
							MotionComp_Block8x8Y(p, x0 + 0, y0 + 0, r, v++);
						}

						if (dc[1] != NOT_CODED) {
							MotionComp_Block8x8Y(p, x0 + 8, y0 + 0, r, v++);
						}

						if (dc[0 + bx] != NOT_CODED) {
							MotionComp_Block8x8Y(p, x0 + 0, y0 + 8, r, v++);
						}

						if (dc[1 + bx] != NOT_CODED) {
							MotionComp_Block8x8Y(p, x0 + 8, y0 + 8, r, v++);
						}

						Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
						Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
						break;
					}

					} /* switch */

					bi += 4;
					mm += 1;
					mv += 4;
					qi += 4;
				}
			}
		}
	}
}

/* */

static void Reconstruct_CPlane(
	FrameDecoder_t* t)
{
	INT32 x, y;

	INT32 sx = t->Index->SX[1] * 32;
	INT32 sy = t->Index->SY[1] * 32;

	INT32 mx = t->Index->MX * 8;
	INT32 my = t->Index->MY * 8;

	INT32 bx = t->Index->BX[1];

	Plane_t* g = t->Frame[0];
	Plane_t* p = t->Frame[1];
	Plane_t* r = t->Frame[2];

	const INT16* DC0 = t->DC + t->Index->BC[0];
	const INT16* DC1 = DC0   + t->Index->BC[1];

	const UINT8* m = t->BMode + t->Index->BC[0];

	const UINT8* qi0 = t->BQI + t->Index->BC[0];
	const UINT8* qi1 = qi0    + t->Index->BC[1];

	DecodeCoefficientsContext_t ctx[2] = { 0 };

	INT32 i;
	for (i = 0; i < 64; i++) {
		ctx[0].Run  [i] = t->BRun  [1][i];
		ctx[0].Coeff[i] = t->BCoeff[1][i];

		ctx[1].Run  [i] = t->BRun  [2][i];
		ctx[1].Coeff[i] = t->BCoeff[2][i];
	}

	for (y = 0; y < sy; y += 32) {
		for (x = 0; x < sx; x += 32) {
			INT32 i;
			for (i = 0; i < 16; i++) {
				INT32 xx = x + S_PX[i];
				INT32 yy = y + S_PY[i];

				if (xx < mx && yy < my) {
					INT32 idx = (xx >> 3) + (yy >> 3) * bx;

					switch (m[idx]) {
					case 0: /* INTER_NOMV */
						Block_CopyPlane8x8(p + 1, xx, yy, r + 1);
						Block_CopyPlane8x8(p + 2, xx, yy, r + 2);

						Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, NULL, ctx + 0);
						Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, NULL, ctx + 1);
						break;

					case 1: /* INTRA */
						Reconstruct_IntraBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
						Reconstruct_IntraBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
						break;

					case 2: /* INTER_MV */
					case 3: /* INTER_MV_LAST */
					case 4: /* INTER_MV_LAST2 */
						MotionComp_Block8x8C(p + 1, xx, yy, r + 1, t->MVC + idx);
						MotionComp_Block8x8C(p + 2, xx, yy, r + 2, t->MVC + idx);

						Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
						Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
						break;

					case 5: /* INTER_GOLDEN_NOMV */
						Block_CopyPlane8x8(p + 1, xx, yy, g + 1);
						Block_CopyPlane8x8(p + 2, xx, yy, g + 2);

						Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
						Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
						break;

					case 6: /* INTER_GOLDEN_MV */
						MotionComp_Block8x8C(p + 1, xx, yy, g + 1, t->MVC + idx);
						MotionComp_Block8x8C(p + 2, xx, yy, g + 2, t->MVC + idx);

						Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
						Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
						break;

					case 7: /* INTER_MV_FOUR */
						MotionComp_Block8x8C(p + 1, xx, yy, r + 1, t->MVC + idx);
						MotionComp_Block8x8C(p + 2, xx, yy, r + 2, t->MVC + idx);

						Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
						Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
						break;

					} /* switch */

					qi0++;
					qi1++;
				}
			}
		}
	}
}

/* */

static void Filter_LoopFilterH(
	const LoopFilter_t* t,
	UINT8*              b,
	INT32               s)
{
	const INT16* d = t->Delta + 127;

	INT32 p0[2];
	INT32 p1[2];

	INT32 q0[2];
	INT32 q1[2];

	UINT8* p   = b;
	UINT8* end = p + s * 8;

	p0[1] = 0;
	p1[1] = 0;
	q0[1] = 255;
	q1[1] = 255;

	for (; p < end; p += s) {
		INT32 x = (p[-2] - p[1]) + 3 * (p[0] - p[-1]);
		INT32 v = d[(x + 4) >> 3];

		p0[0] = p[-1] + v;
		p1[0] = p[ 0] - v;

		q0[0] = p0[(p0[0] < 0)];
		q1[0] = p1[(p1[0] < 0)];

		p[-1] = q0[(q0[0] > 255)];
		p[ 0] = q1[(q1[0] > 255)];
	}
}

static void Filter_LoopFilterV(
	const LoopFilter_t* t,
	UINT8*              b,
	INT32               s)
{
	const INT16* d = t->Delta + 127;

	INT32 p0[2];
	INT32 p1[2];

	INT32 q0[2];
	INT32 q1[2];

	UINT8* p   = b;
	UINT8* end = p + 8;

	p0[1] = 0;
	p1[1] = 0;
	q0[1] = 255;
	q1[1] = 255;

	for (; p < end; p++) {
		INT32 x = (p[-2 * s] - p[1 * s]) + 3 * (p[0] - p[-1 * s]);
		INT32 v = d[(x + 4) >> 3];

		p0[0] = p[-s] + v;
		p1[0] = p[ 0] - v;

		q0[0] = p0[(p0[0] < 0)];
		q1[0] = p1[(p1[0] < 0)];

		p[-s] = q0[(q0[0] > 255)];
		p[ 0] = q1[(q1[0] > 255)];
	}
}

/* */

static void FrameLoopFilter(
	FrameDecoder_t* t)
{
	INT32 i;
	INT32 x, y;

	const INT16* b = t->DC;

	Plane_t* plane = t->Frame[1];

	for (i = 0; i < 3; i++, plane++) {
		INT32 bx = t->Index->BX[i];
		INT32 by = t->Index->BY[i];

		UINT8* r0 = plane->Plane;

		for (y = 0; y < by; y++, r0 += plane->Pitch * 8) {
			UINT8* r = r0;

			for (x = 0; x < bx; x++, r += 8, b++) {
				if (*b != NOT_CODED) {
					if (x > 0) {
						Filter_LoopFilterH(&(t->Filter), r, plane->Pitch);
					}

					if (y > 0) {
						Filter_LoopFilterV(&(t->Filter), r, plane->Pitch);
					}

					if (x < bx - 1 && b[ 1] == NOT_CODED) {
						Filter_LoopFilterH(&(t->Filter), r + 8, plane->Pitch);
					}

					if (y < by - 1 && b[bx] == NOT_CODED) {
						Filter_LoopFilterV(&(t->Filter), r + 8 * plane->Pitch, plane->Pitch);
					}
				}
			}
		}
	}
}

/* */

void QT_ReconstructFrame(
	FrameDecoder_t* t)
{
	Reconstruct_YPlane(t);

	Reconstruct_CPlane(t);

	if (t->Filter.Limit > 0) {
		FrameLoopFilter(t);
	}
}

/* */

