/* FrameDecoder_Impl.h */
/* 2009/06/29          */

/* */

static BOOL FrameHeader_Decode(
	FrameHeader_t* t,
	BitReader_t*   d)
{
	INITIATE_BITS

	INT32 n = 0;
	INT32 x;

	FETCH_BITS_I(x, 1)
	if (x != 0) {
		return FALSE;
	}

	FETCH_BITS_I(t->Type, 1)

	FETCH_BITS_I(t->QIS[n], 6) n++;

	FETCH_BITS_I(x, 1)
	if (x != 0) {
		FETCH_BITS_I(t->QIS[n], 6) n++;

		FETCH_BITS_I(x, 1)
		if (x != 0) {
			FETCH_BITS_I(t->QIS[n], 6) n++;
		}
	}

	t->NQIS = n;

	if (t->Type == 0) { /* INTRA */
		FETCH_BITS_I(x, 3)
		if (x != 0) {
			return FALSE;
		}
	}

	FINALIZE_BITS

	return TRUE;
}

/* */

static BOOL FrameDecoder_DecodeCodedBlockFlag(
	FrameDecoder_t* t,
	BitReader_t*    d)
{
	INITIATE_BITS

	INT8* p;
	INT32 x, i, j;

	RunLength_t r;

	INT32 full = 0;
	INT32 part = 0;

	for (i = 0; i < 3; i++) {
		t->BlocksCoded[i] = 0;
	}

	memset(t->BCoded, 1, sizeof(UINT8) * t->Index->Blocks);

	/* PARTIAL or FULL */

	FETCH_BITS_I(x, 1)
	RunLength_Start(&r, x);

	p = t->SBCoded;
	for (i = 0; i < 3; i++) {
		INT8* e = p + t->Index->SC[i];
		for (; p < e; p++) {
			RL_LONG_DECODE

			*p = r.Bit;
			if (r.Bit == 0) {
				full++;
			} else {
				part++;
			}
		}
	}

	if (r.Run > 0) {
		return FALSE;
	}

	/* FULL */

	if (full > 0) {
		UINT8* coded = t->BCoded;

		FETCH_BITS_I(x, 1)
		RunLength_Start(&r, x);

		p = t->SBCoded;
		for (i = 0; i < 3; i++) {
			const UINT8* c = t->Index->BCount[i];

			INT8* e = p + t->Index->SC[i];
			for (; p < e; p++, c++) {
				if (*p == 0) {
					RL_LONG_DECODE

					if (r.Bit != 0) {
						*p = -1;

						t->BlocksCoded[i] += *c;

					} else {
						for (j = 0; j < *c; j++) {
							coded[j] = 0;
						}
					}
				}

				coded += *c;
			}
		}

		if (r.Run > 0) {
			return FALSE;
		}
	}

	/* PARTIAL */

	if (part > 0) {
		UINT8* coded = t->BCoded;

		FETCH_BITS_I(x, 1)
		RunLength_Start(&r, x);

		p = t->SBCoded;
		for (i = 0; i < 3; i++) {
			const UINT8* c = t->Index->BCount[i];

			INT8* e = p + t->Index->SC[i];
			for (; p < e; p++, c++) {
				if (*p == 1) {
					for (j = 0; j < *c; j++) {
						RL_SHORT_DECODE

						if (r.Bit != 0) {
							t->BlocksCoded[i] += 1;

						} else {
							coded[j] = 0;
						}
					}
				}

				coded += *c;
			}
		}

		if (r.Run > 0) {
			return FALSE;
		}
	}

	FINALIZE_BITS

	return TRUE;
}

/* */

static BOOL FrameDecoder_DecodeMacroBlockCodingModes(
	FrameDecoder_t* t,
	BitReader_t*    d)
{
	INITIATE_BITS

	INT32 scheme;
	UINT8 mode[8];

	UINT8* m   = t->MBMode;
	UINT8* end = m + t->Index->MC;

	const UINT16* mbi = t->Index->MBIndex;
	const UINT16* cbi = t->Index->CBIndex;

	UINT8* m0 = t->BMode;
	UINT8* m1 = m0 + t->Index->BC[0];
	UINT8* m2 = m1 + t->Index->BC[1];

	const UINT8* c = t->BCoded;

	FETCH_BITS_I(scheme, 3)
	memcpy(mode, M_MODE[scheme], 8);

	if (scheme == 0) {
		INT32 i;
		for (i = 0; i < 8; i++) {
			INT32 x;
			FETCH_BITS_I(x, 3)
			mode[x] = i;
		}
	}

	if (scheme != 7) {
		for (; m < end; m++, c += 4, mbi += 4, cbi++) {
			UINT8 mm = 0;

			if (c[0] != 0 || c[1] != 0 || c[2] != 0 || c[3] != 0) {
				INT32 code;
				LOAD_BITS
				code = H_MODE[GET_BITS_I(7)];
				RETIRE_BITS(code & 0xf);
				mm = mode[code >> 4];
			}

			*m = mm;

			m0[mbi[0]] = mm;
			m0[mbi[1]] = mm;
			m0[mbi[2]] = mm;
			m0[mbi[3]] = mm;
			m1[cbi[0]] = mm;
			m2[cbi[0]] = mm;
		}

	} else {
		for (; m < end; m++, c += 4, mbi += 4, cbi++) {
			UINT8 mm = 0;

			if (c[0] != 0 || c[1] != 0 || c[2] != 0 || c[3] != 0) {
				INT32 x;
				FETCH_BITS_I(x, 3)
				mm = mode[x];
			}

			*m = mm;

			m0[mbi[0]] = mm;
			m0[mbi[1]] = mm;
			m0[mbi[2]] = mm;
			m0[mbi[3]] = mm;
			m1[cbi[0]] = mm;
			m2[cbi[0]] = mm;
		}
	}

	FINALIZE_BITS

	return TRUE;
}

/* */

static BOOL FrameDecoder_DecodeMotionVectors(
	FrameDecoder_t* t,
	BitReader_t*    d)
{
	INITIATE_BITS

	const UINT8* m   = t->MBMode;
	const UINT8* end = m + t->Index->MC;

	const UINT16* cbi = t->Index->CBIndex;

	const UINT8* c = t->BCoded;

	MotionVector_t* mv = t->MV;

	MotionVector_t last[2] = { { 0, 0 }, { 0, 0 } };

	INT32 (*Decode)(MotionVector_t*, INT32);

	INT32 x, s;
	FETCH_BITS_I(x, 1)

	Decode = (x == 0) ? DecodeMV0 : DecodeMV1;

	for (; m < end; m++, c += 4, mv += 4, cbi++) {
		switch (*m) {
		case 2: /* INTER_MV */
			LOAD_BITS
			x = GET_BITS_I(16);
			s = Decode(mv, x);
			RETIRE_BITS(s)

			last[1] = last[0];
			last[0] = *mv;
			break;

		case 3: /* INTER_MV_LAST */
			*mv = last[0];
			break;

		case 4: /* INTER_MV_LAST2 */
			*mv = last[1];

			last[1] = last[0];
			last[0] = *mv;
			break;

		case 6: /* INTER_GOLDEN_MV */
			LOAD_BITS
			x = GET_BITS_I(16);
			s = Decode(mv, x);
			RETIRE_BITS(s)
			break;

		case 7: /* INTER_MV_FOUR */
		{
			MotionVector_t* mv0 = mv;

			MotionVector_t* v = mv;

			INT32 cmx = 0;
			INT32 cmy = 0;

			INT32 i;
			for (i = 0; i < 4; i++) {
				if (c[i] != 0) {
					LOAD_BITS
					x = GET_BITS_I(16);
					s = Decode(v, x);
					RETIRE_BITS(s)

					cmx += v->X;
					cmy += v->Y;

					mv0 = v++;
				}
			}

			for (; v < mv + 4; v++) {
				v->X = 0;
				v->Y = 0;
			}

			t->MVC[*cbi].X = (cmx + CMV[cmx < 0]) >> 2;
			t->MVC[*cbi].Y = (cmy + CMV[cmy < 0]) >> 2;

			last[1] = last[0];
			last[0] = *mv0;
			break;
		}

		default:
			mv->X = 0;
			mv->Y = 0;
			break;

		} /* switch */

		if (*m != 7) { /* INTER_MV_FOUR */
			t->MVC[*cbi] = *mv;
		}
	}

	FINALIZE_BITS

	return TRUE;
}

/* */

static BOOL FrameDecoder_BlockLevelQIDecode(
	FrameDecoder_t* t,
	BitReader_t*    d)
{
	INITIATE_BITS

	INT32 nqi = 0;

	UINT8* qi = t->BQI;

	const UINT8* c   = t->BCoded;
	const UINT8* end = c + t->Index->Blocks;

	INT32 bit, run, full;

	FETCH_BITS_I(bit, 1)
	run = 0;

	while (c < end) {
		RL_LONG_DECODE_RUN(run)
		full = (run >= RL_LONG_FULL_RUN);

		for (; run > 0 && c < end; c++, qi++) {
			if (*c != 0) {
				run--;
				*qi  = bit;
				nqi += bit;
			}
		}

		for (; c < end && *c == 0; c++, qi++)
			;

		if (full != 0 && c < end) {
			FETCH_BITS_I(bit, 1)
		} else {
			bit = !bit;
		}
	}

	if (run > 0) {
		return FALSE;
	}

	if (t->Header.NQIS == 3 && nqi > 0) {
		UINT8* eqi = t->BQI + t->Index->Blocks;
		qi = t->BQI;

		FETCH_BITS_I(bit, 1)
		run = 0;

		while (qi < eqi) {
			RL_LONG_DECODE_RUN(run)
			full = (run >= RL_LONG_FULL_RUN);

			for (; run > 0 && qi < eqi; qi++) {
				if (*qi != 0) {
					run--;
					*qi += bit;
				}
			}

			for (; qi < eqi && *qi == 0; qi++)
				;

			if (full != 0 && qi < eqi) {
				FETCH_BITS_I(bit, 1)
			} else {
				bit = !bit;
			}
		}

		if (run > 0) {
			return FALSE;
		}
	}

	FINALIZE_BITS

	return TRUE;
}

/* */

static BOOL FrameDecoder_DecodeBlocks(
	FrameDecoder_t*           t,
	BitReader_t*              d,
	const HuffmanDecoder_t*   h,
	DCTCoefficientsContext_t* ctx,
	INT32                     index)
{
	INITIATE_BITS

#ifdef ARCH_MMX
	INT32 run_count[64];
#else
	INT32 run_count[64] = { 0 };
#endif

	INT32 total_eob = 0;

	INT32 count = ctx->BlocksCoded[index];

	INT32 eob_run = ctx->EOB_Run;

	INT8*  prun   = ctx->Run;
	INT16* pcoeff = ctx->Coeff;

#ifdef ARCH_MMX
	{
		__m64* d = (__m64*) run_count;
		__m64* e = (__m64*)(run_count + 64);

		__m64 z = _mm_setzero_si64();

		for (; d < e; d += 8) {
			d[0] = z;
			d[1] = z;
			d[2] = z;
			d[3] = z;
			d[4] = z;
			d[5] = z;
			d[6] = z;
			d[7] = z;
		}
	}
#endif

	while (count > 0) {
		INT32 run, coeff;

		if (eob_run == 0) {
			INT32 token = -1;
			INT32 bits  = 0;
#ifdef ARCH_C
			INT32 rbits = 0;
#endif

			UINT32 code = 0;

			LOAD_BITS

			/* h->LookupBits == 10 */

			{
				INT32 len;

				code = GET_BITS_I(10);
				len  = h->LookupLength[code];

				if (len > 0) {
					RETIRE_BITS(len)
					token = h->LookupToken[code];

				} else {
#ifdef ARCH_C
					RETIRE_BITS(h->LookupBits)
					rbits = bits = h->LookupBits;
#else
					bits = 10;
#endif
				}
			}

			for (; token < 0 && bits < BITS_COUNT; bits++) {
				INT32 es = h->Count[bits], idx;
				if (es < 0) {
					break;
				}

				idx = h->Index[bits];

#ifdef ARCH_C
				if (idx >= 0) {
					INT32 len = bits + 1 - rbits;

					const UINT32* s = h->Code + idx;
					const UINT32* e = s + es;

					LOAD_BITS
					code = (code << len) | GET_BITS(len);
					RETIRE_BITS(len)

					rbits = bits + 1;

					while (s < e) {
						const UINT32* x = s + (e - s) / 2;
						if (code == *x) {
							token = h->Token[x - h->Code];
							break;
						}

						if (code < *x) {
							e = x;
						} else {
							s = x + 1;
						}
					}
				}
#else
				if (idx >= 0) {
					INT32 len = bits + 1;

					const UINT32* s = h->Code + idx;
					const UINT32* e = s + es;

					/* LOAD_BITS */
					code = GET_BITS(len);

					while (s < e) {
						const UINT32* x = s + (e - s) / 2;
						if (code == *x) {
							RETIRE_BITS(len)

							token = h->Token[x - h->Code];
							break;
						}

						if (code < *x) {
							e = x;
						} else {
							s = x + 1;
						}
					}
				}
#endif

			}

			if (token < 0) {
				return FALSE;
			}

			if (token < 7) {
				INT32 blen = EOB_BITS_LEN[token];
				eob_run = EOB_RUN_BASE[token];
				if (blen > 0) {
					INT32 x;
					FETCH_BITS(x, blen)
					eob_run += x;
				}

			} else {
				coeff = COEFF_BASE[token - 7];
				{
					INT32 clen = COEFF_BITS_LEN[token - 7];
					if (clen > 0) {
						INT32 x;
						if (clen == 1) {
							FETCH_BITS_I(x, 1)
							coeff *= COEFF_SIGN[x];

						} else {
							FETCH_BITS(x, clen)
							coeff = COEFF_SIGN[x >> (clen - 1)] * (coeff + (x & ((1 << (clen - 1)) - 1)));
						}
					}
				}

				run = RUN_BASE[token - 7];
				{
					INT32 rlen = RUN_BITS_LEN  [token - 7];
					if (rlen > 0) {
						INT32 x;
						FETCH_BITS(x, rlen)
						run += x;
					}
				}
			}
		}

		if (eob_run == 0) {
			*(prun  ++) = run;
			*(pcoeff++) = coeff;

			if (index + run + 1 > 64) {
				return FALSE;
			}

			run_count[run] += 1;

			count -= 1;

		} else {
			INT32 eob = eob_run;
			if (eob > count) {
				eob = count;
			}

			*(prun  ++) = -1;
			*(pcoeff++) = eob;

			total_eob += eob;

			eob_run -= eob;

			count -= eob;
		}
	}

	ctx->Run   = prun;
	ctx->Coeff = pcoeff;

	ctx->EOB_Run = eob_run;

	{
		INT32 i, j = 1;

		run_count[63 - index] += total_eob;

		for (i = 63 - index - 1; i >= 1; i--) {
			run_count[i] += run_count[i + 1];
		}

		for (i = index + 1; i < 64; i++, j++) {
			ctx->BlocksCoded[i] -= run_count[j];
		}
	}

	FINALIZE_BITS

	if (REST_BITS < 0) {
		return FALSE;
	}

	return TRUE;
}

static BOOL FrameDecoder_DecodeDCTCoefficients(
	FrameDecoder_t* t,
	BitReader_t*    d)
{
	INT32 i, j, k;

	const HuffmanDecoder_t* huff[3];

	INT32 index = 0;

	INT32 hy, hc;

	DCTCoefficientsContext_t ctx = { 0 };

	INT32 Coded[3][64];

	ctx.Run   = t->DCTRun;
	ctx.Coeff = t->DCTCoeff;

	for (i = 0; i < 64; i++) {
		Coded[0][i] = t->BlocksCoded[0];
		Coded[1][i] = t->BlocksCoded[1];
		Coded[2][i] = t->BlocksCoded[2];
	}

	for (i = 0; i < 5; i++) {
		if (i < 2) {
			INITIATE_BITS
			FETCH_BITS_I(hy, 4);
			FETCH_BITS_I(hc, 4);
			FINALIZE_BITS
		}

		huff[0] = t->Setup->Huffman + i * 0x10 + hy;
		huff[1] = t->Setup->Huffman + i * 0x10 + hc;
		huff[2] = huff[1];

		for (j = 0; j < COEFFS[i]; j++, index++) {
			for (k = 0; k < 3; k++) {
				t->BRun  [k][index] = ctx.Run;
				t->BCoeff[k][index] = ctx.Coeff;

				ctx.BlocksCoded = Coded[k];

				if (!FrameDecoder_DecodeBlocks(
					t,
					d,
					huff[k],
					&ctx,
					index)) {
					return FALSE;
				}
			}
		}
	}

	return TRUE;
}

/* */

static BOOL FrameDecoder_Decode(
	FrameDecoder_t* t,
	const VOID*     p,
	SIZE_T          size)
{
	BitReader_t d;

	INT32 i, qup = 0;

	QT_BitReader_Initialize(&d, p, size);

	if (!FrameHeader_Decode(&(t->Header), &d)) {
		return FALSE;
	}

	if (t->QCount != t->Header.NQIS) {
		t->QCount = t->Header.NQIS;
		qup = 1;

	} else {
		for (i = 0; i < t->Header.NQIS; i++) {
			if (t->QIndex[i] != t->Header.QIS[i]) {
				qup = 1;
				break;
			}
		}
	}

	if (qup) {
		for (i = 0; i < t->Header.NQIS; i++) {
			t->QIndex[i] = t->Header.QIS[i];
			Dequantize_MakeMatrix(
				&(t->Dequantize[i]),
				&(t->Setup->Dequantize),
				t->QIndex[i]);

			if (i == 0) {
				Filter_Setup(
					&(t->Filter),
					&(t->Setup->Filter),
					t->QIndex[0]);

			} else {
				/* fix DC dequantizer */
				DequantizeMatrix_t* m0 = t->Dequantize + 0;
				DequantizeMatrix_t* mm = t->Dequantize + i;

				INT32 j, k;

				for (j = 0; j < 2; j++) {
					for (k = 0; k < 3; k++) {
						mm->Matrix[j][k][0] = m0->Matrix[j][k][0];
					}
				}
			}
		}

		if (t->UpdateDequantizeMatrix != NULL) {
			t->UpdateDequantizeMatrix(t);
		}
	}

	/* */

	if (t->Header.Type == 0) { /* Intra */
		t->Frame[0] = t->Plane + 0; /* G */
		t->Frame[1] = t->Plane + 0; /* C */
		t->Frame[2] = t->Plane + 3; /* R */

		memset(t->BCoded, 1, sizeof(UINT8) * t->Index->Blocks);

		memset(t->MBMode, 1, sizeof(UINT8) * t->Index->MC);
		memset(t->BMode,  1, sizeof(UINT8) * t->Index->Blocks);

		t->BlocksCoded[0] = t->Index->BC[0];
		t->BlocksCoded[1] = t->Index->BC[1];
		t->BlocksCoded[2] = t->Index->BC[2];

	} else {
		if (t->Frame[1] == t->Plane + 0) {
			t->Frame[1] = t->Plane + 3;
			t->Frame[2] = t->Plane + 0;
		} else if (t->Frame[1] == t->Plane + 3) {
			t->Frame[1] = t->Plane + 6;
			t->Frame[2] = t->Plane + 3;
		} else if (t->Frame[1] == t->Plane + 6) {
			t->Frame[1] = t->Plane + 3;
			t->Frame[2] = t->Plane + 6;
		}

		if (!FrameDecoder_DecodeCodedBlockFlag(t, &d)) {
			return FALSE;
		}

		if (!FrameDecoder_DecodeMacroBlockCodingModes(t, &d)) {
			return FALSE;
		}

		if (!FrameDecoder_DecodeMotionVectors(t, &d)) {
			return FALSE;
		}
	}

	memset(t->BQI, 0, sizeof(UINT8) * t->Index->Blocks);

	if (t->Header.NQIS > 1) {
		if (!FrameDecoder_BlockLevelQIDecode(t, &d)) {
			return FALSE;
		}
	}

	if (!FrameDecoder_DecodeDCTCoefficients(t, &d)) {
		return FALSE;
	}

	/* */

	DecodeDCCoefficients(t);

	/* */

	UndoDCPrediction(t);

	/* */

	t->Reconstruct(t);

	/* */

	return TRUE;
}

/* */

#undef FrameHeader_Decode

#undef FrameDecoder_DecodeCodedBlockFlag
#undef FrameDecoder_DecodeMacroBlockCodingModes
#undef FrameDecoder_DecodeMotionVectors

#undef FrameDecoder_BlockLevelQIDecode

#undef FrameDecoder_DecodeBlocks
#undef FrameDecoder_DecodeDCTCoefficients

#undef FrameDecoder_Decode

/* */

