//	SRC_Filter - High Quality Audio Sample-Rate-Conversion 
//	Copyright (C) 2001 Andreas Dittrich
//
//	This program is free software; you can redistribute it and/or modify
//	it under the terms of the GNU General Public License as published by
//	the Free Software Foundation; either version 2 of the License, or
//	(at your option) any later version.
//
//	This program is distributed in the hope that it will be useful,
//	but WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//	GNU General Public License for more details.
//
//	You should have received a copy of the GNU General Public License
//	along with this program; if not, write to the Free Software
//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

/******************************************************************************/
/*                                                                            */
/* src_filter_asm.cpp                                                         */
/*                                                                            */
/******************************************************************************/
/* Author(s)    : Andreas Dittrich                                            */
/* Project start: 06.11.2001                                                  */
/* Completed    : 28.11.2001                                                  */
/* Last change  : 07.03.2002                                                  */
/* Time         : 00:45                                                       */
/******************************************************************************/
/* Changes      : I                                                           */
/******************************************************************************/

/******************************************************************************/
/* Last change  : 07.03.2002                                                  */
/* Time         : 00:45                                                       */
/* By         : Andreas Dittrich                                              */
/* Description: Bug conversion long to short corrected in mono and stereo     */
/******************************************************************************/


#include "src_filter_asm.h"

static long lZi[N_FILTERORDER];
static short sZi[N_FILTERORDER];


static void __declspec(naked) filter_mono( short int *x_ptr, short int *h_ptr1, short int *h_ptr2, long *y_ptr, long N_FO16 ) {
	__asm {
		push	ebp
		push	edi
		push	esi
		push	ebx

		mov		edi,[esp+16+16]		; &y
		mov		ecx,dword ptr [edi]	; y
		mov		edi,[esp+4+16]		; x_ptr
		mov		esi,[esp+8+16]		; h_ptr1

		mov		ebp, [esp+20+16]	; N_FILTERORDER16

h1_loop:
		movsx	eax,word ptr [esi]	; h_ptr[0]
		movsx	ebx,word ptr [edi]	; x_ptr[0]
		imul	ebx,eax
		movsx	eax,word ptr [edi+2]
		movsx	edx,word ptr [esi+2]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx

		movsx	eax,word ptr [edi+4]
		movsx	ebx,word ptr [esi+4]
		imul	ebx,eax
		movsx	eax,word ptr [edi+6]
		movsx	edx,word ptr [esi+6]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx

		movsx	eax,word ptr [edi+8]
		movsx	ebx,word ptr [esi+8]
		imul	ebx,eax
		movsx	eax,word ptr [edi+10]
		movsx	edx,word ptr [esi+10]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx

		movsx	eax,word ptr [edi+12]
		movsx	ebx,word ptr [esi+12]
		imul	ebx,eax
		movsx	eax,word ptr [edi+14]
		movsx	edx,word ptr [esi+14]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx


		add		edi,16		; x_ptr += 8
		add		esi,16		; h_ptr1 += 8
		sub		ebp, 1	
		jne		h1_loop
; end of h1_loop


		mov		esi,[esp+12+16]		; h_ptr1
		mov		ebp, [esp+20+16]	; N_FILTERORDER16
h2_loop:
		movsx	eax,word ptr [esi-2]	; h_ptr[0]
		movsx	ebx,word ptr [edi]	; x_ptr[0]
		imul	ebx,eax
		movsx	eax,word ptr [edi+2]
		movsx	edx,word ptr [esi-4]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx

		movsx	eax,word ptr [edi+4]
		movsx	ebx,word ptr [esi-6]
		imul	ebx,eax
		movsx	eax,word ptr [edi+6]
		movsx	edx,word ptr [esi-8]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx

		movsx	eax,word ptr [edi+8]
		movsx	ebx,word ptr [esi-10]
		imul	ebx,eax
		movsx	eax,word ptr [edi+10]
		movsx	edx,word ptr [esi-12]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx

		movsx	eax,word ptr [edi+12]
		movsx	ebx,word ptr [esi-14]
		imul	ebx,eax
		movsx	eax,word ptr [edi+14]
		movsx	edx,word ptr [esi-16]
		imul	edx,eax

		add		edx,ebx
		sar		edx,MUL_ACC_SHR
		add		ecx,edx

		add		edi,16		; x_ptr += 16
		sub		esi,16		; h_ptr1 -= 16
		sub		ebp, 1	
		jne		h2_loop
; end of h2_loop

		mov		edi,[esp+16+16]		; &y
		mov		dword ptr [edi],ecx	; y

		pop		ebx
		pop		esi
		pop		edi
		pop		ebp
		ret
	}
}



int sample_rate_conversion_HQ_16bit_mono( short int *src, long N_src, short int *dst, long N_dst, 
				short int *h )
{
	short int *src_ptr, *h_ptr1, *h_ptr2, *sZi_end;
	long phase;
	long i,j;
	long mu;

	phase = -N_src;
	mu = -N_FILTERORDER;
	sZi_end = sZi + N_FILTERORDER;

	for (i=0;i<N_dst;i++) {
		long phase_x_N_SUBFILTER;
		long filter_nr;
		long beta;
		long alpha;

		long y_A = 0;
		long y_B = 0;

		phase = phase + N_src;
		while ( phase >= N_dst ) {
			mu++; 
			phase = phase - N_dst;
		}

		phase_x_N_SUBFILTER = phase*N_SUBFILTER;
		filter_nr = phase_x_N_SUBFILTER/N_dst + 1;
		beta = ( ((phase_x_N_SUBFILTER%N_dst)<<LINEAR_INTERPOLATION_DIGITS) + N_dst/2 )/N_dst;
		alpha = (1<<LINEAR_INTERPOLATION_DIGITS) - beta;

		if (filter_nr != N_SUBFILTER) 
		{
			h_ptr1 = h + ( N_SUBFILTER - filter_nr ) * N_FILTERORDER2;
			h_ptr2 = h + ( filter_nr ) * N_FILTERORDER2;
			src_ptr = src + mu;

			if (mu<0) {

				long nu = mu;

				j = N_FILTERORDER2;
				do {
					if (nu<0) {
						y_A += ( sZi_end[nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						y_B += ( sZi_end[nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						y_A += ( src[nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						y_B += ( src[nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr1++;
				} while (--j);

				j = N_FILTERORDER2;
				do {
					if (nu<0) {
						y_A += ( sZi_end[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						y_B += ( sZi_end[nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						y_A += ( src[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						y_B += ( src[nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr2--;
				} while (--j);

			} else {
				filter_mono( src_ptr, h_ptr1, h_ptr2, &y_A, (long)N_FILTERORDER16 );
				filter_mono( src_ptr, h_ptr1 - N_FILTERORDER2, h_ptr2 + N_FILTERORDER2, &y_B, (long)N_FILTERORDER16 );
			}

		} 
		else
		{
			h_ptr1 = h;
			h_ptr2 = h + N_SUBFILTER * N_FILTERORDER2;
			src_ptr = src + mu;

			if (mu<0) {

				long nu = mu;

				j = N_FILTERORDER2;
				do {
					if (nu<-1) {
						y_A += ( sZi_end[nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						y_B += ( sZi_end[nu+1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else if (nu<0) {
						y_A += ( sZi_end[-1] * h_ptr1[0] ) >> MUL_ACC_SHR;
						y_B += ( src[0] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						y_A += ( src[nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						y_B += ( src[nu+1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr1++;
				} while (--j);

				j = N_FILTERORDER2;
				do {
					if (nu<-1) {
						y_A += ( sZi_end[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						y_B += ( sZi_end[nu+1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else if (nu<0) {
						y_A += ( sZi_end[-1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						y_B += ( src[0] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						y_A += ( src[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						y_B += ( src[nu+1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr2--;
				} while (--j);

			} else {
				filter_mono( src_ptr, h_ptr1, h_ptr2, &y_A, (long)N_FILTERORDER16 );
				filter_mono( src_ptr+1, h_ptr1+(N_SUBFILTER-1)*N_FILTERORDER2, h_ptr2-(N_SUBFILTER-1)*N_FILTERORDER2, &y_B, (long)N_FILTERORDER16 );
			}
		}

		long y_C = ( (y_A*alpha+y_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) );

		if (y_C>32767)
			*(dst++) = 32767;
		else if (y_C<-32768)
			*(dst++) = -32768;
		else *(dst++) = (short int)y_C;

	}

	memcpy( sZi, src+N_src-N_FILTERORDER, N_FILTERORDER*2 );

	return 0;
}



static void __declspec(naked) filter_stereo( long *x_ptr, short int *h_ptr1, short int *h_ptr2, long *yl_ptr, long *yr_ptr, long N_FO16 ) {
	__asm {
		push	ebp
		push	edi
		push	esi
		push	ebx
		push	0x00000000		; [esp+4] Speicher fr yl
		push	0x00000000		; [esp+0] Speicher fr yr

		mov		edi,[esp+4+24]		; x_ptr
		mov		esi,[esp+8+24]		; h_ptr1
		mov		ebp, [esp+24+24]	; N_FILTERORDER16

h1_loop:
		movsx	eax,word ptr [esi+0]	; h_ptr[0]
		movsx	ebx,word ptr [edi+0]	; x_ptr[0] left 
		movsx	edx,word ptr [edi+2]	; x_ptr[0] right

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi+2]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+4] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+6]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi+4]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+8] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+10]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi+6]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+12] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+14]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi+8]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+16] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+18]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi+10]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+20] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+22]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi+12]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+24] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+26]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi+14]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+28] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+30]

		imul	ebx,eax
		imul	edx,eax
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		add		[esp+0],edx


		add		edi,32
		add		esi,16
		sub		ebp, 1	
		jne		h1_loop
; end of h1_loop


		mov		esi,[esp+12+24]		; h_ptr2
		mov		ebp, [esp+24+24]	; N_FILTERORDER16
h2_loop:
		movsx	eax,word ptr [esi-2]
		movsx	ebx,word ptr [edi+0] 
		movsx	edx,word ptr [edi+2]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi-4]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+4] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+6]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi-6]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+8] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+10]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi-8]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+12] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+14]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi-10]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+16] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+18]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi-12]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+20] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+22]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi-14]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+24] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+26]

		imul	ebx,eax
		imul	edx,eax
		movsx	eax,word ptr [esi-16]
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		movsx	ebx,word ptr [edi+28] 
		add		[esp+0],edx
		movsx	edx,word ptr [edi+30]

		imul	ebx,eax
		imul	edx,eax
		sar		ebx,MUL_ACC_SHR
		sar		edx,MUL_ACC_SHR
		add		[esp+4],ebx
		add		[esp+0],edx

		
		add		edi,32
		sub		esi,16
		sub		ebp, 1	
		jne		h2_loop
; end of h2_loop

		mov		edi,[esp+16+24]		; &yl
		mov		eax, [esp+4]
		add		[edi],eax

		mov		edi,[esp+20+24]		; &yr
		mov		eax, [esp+0]
		add		[edi],eax
		
		pop		eax
		pop		eax
		pop		ebx
		pop		esi
		pop		edi
		pop		ebp
		ret
	}
}









int sample_rate_conversion_HQ_16bit_stereo( long *src, long N_src, long *dst, long N_dst, 
				short int *h )
{
	long *lZi_end;
	short int *h_ptr1, *h_ptr2;
	long phase;
	long i,j;
	long mu;

	phase = -N_src;
	mu = -N_FILTERORDER;
	lZi_end = lZi + N_FILTERORDER;

	for (i=0;i<N_dst;i++) {
		long phase_x_N_SUBFILTER;
		long filter_nr;
		long beta;
		long alpha;

		long yl_A = 0;
		long yl_B = 0;
		long yr_A = 0;
		long yr_B = 0;

		short int *sisrc = (short int*)src;
		short int *sidst = (short int*)(dst+i);
		short int *siZi_end = (short int*)lZi_end;

		phase = phase + N_src;
		while ( phase >= N_dst ) {
			mu++; 
			phase = phase - N_dst;
		}

		phase_x_N_SUBFILTER = phase*N_SUBFILTER;
		filter_nr = phase_x_N_SUBFILTER/N_dst + 1;
		beta = ( ((phase_x_N_SUBFILTER%N_dst)<<LINEAR_INTERPOLATION_DIGITS) + N_dst/2 )/N_dst;
		alpha = (1<<LINEAR_INTERPOLATION_DIGITS) - beta;

		if (filter_nr != N_SUBFILTER) 
		{
			h_ptr1 = h + ( N_SUBFILTER - filter_nr ) * N_FILTERORDER2;
			h_ptr2 = h + ( filter_nr ) * N_FILTERORDER2;

			if (mu<0) {

				long nu = mu;

				j = N_FILTERORDER2;
				do {
					if (nu<0) {
						yl_A += ( siZi_end[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yr_A += ( siZi_end[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yl_B += ( siZi_end[2*nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( siZi_end[2*nu+1] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						yl_A += ( sisrc[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yr_A += ( sisrc[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yl_B += ( sisrc[2*nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( sisrc[2*nu+1] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr1++;
				} while (--j);

				j = N_FILTERORDER2;
				do {
					if (nu<0) {
						yl_A += ( siZi_end[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yr_A += ( siZi_end[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yl_B += ( siZi_end[2*nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( siZi_end[2*nu+1] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						yl_A += ( sisrc[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yr_A += ( sisrc[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yl_B += ( sisrc[2*nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( sisrc[2*nu+1] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr2--;
				} while (--j);


			} else {
				filter_stereo( src+mu, h_ptr1, h_ptr2, &yl_A, &yr_A, (long)N_FILTERORDER16 );
				filter_stereo( src+mu, h_ptr1 - N_FILTERORDER2, h_ptr2 + N_FILTERORDER2, &yl_B, &yr_B, (long)N_FILTERORDER16 );
			}

		} 
		else
		{
			h_ptr1 = h;
			h_ptr2 = h + N_SUBFILTER * N_FILTERORDER2;

			if (mu<0) {

				long nu = mu;

				j = N_FILTERORDER2;
				do {
					if (nu<-1) {
						yl_A += ( siZi_end[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yr_A += ( siZi_end[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yl_B += ( siZi_end[2*nu+2] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( siZi_end[2*nu+3] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else if (nu<0) {
						yl_A += ( siZi_end[-2] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yr_A += ( siZi_end[-1] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yl_B += ( sisrc[0] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( sisrc[1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						yl_A += ( sisrc[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yr_A += ( sisrc[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
						yl_B += ( sisrc[2*nu+2] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( sisrc[2*nu+3] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr1++;
				} while (--j);

				j = N_FILTERORDER2;
				do {
					if (nu<-1) {
						yl_A += ( siZi_end[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yr_A += ( siZi_end[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yl_B += ( siZi_end[2*nu+2] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( siZi_end[2*nu+3] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else if (nu<0) {
						yl_A += ( siZi_end[-2] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yr_A += ( siZi_end[-1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yl_B += ( sisrc[0] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( sisrc[1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					} else {
						yl_A += ( sisrc[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yr_A += ( sisrc[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
						yl_B += ( sisrc[2*nu+2] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
						yr_B += ( sisrc[2*nu+3] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
					}
					nu++;
					h_ptr2--;
				} while (--j);

			} else {
				filter_stereo( src+mu, h_ptr1, h_ptr2, &yl_A, &yr_A, (long)N_FILTERORDER16 );
				filter_stereo( src+mu+1, h_ptr1+(N_SUBFILTER-1)*N_FILTERORDER2, h_ptr2-(N_SUBFILTER-1)*N_FILTERORDER2, &yl_B, &yr_B, (long)N_FILTERORDER16 );
			}
		}
		
		long yl = ( (yl_A*alpha+yl_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) );
		long yr = ( (yr_A*alpha+yr_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) );

		if (yl>32767)
			sidst[0] = 32767;
		else if (yl<-32768)
			sidst[0] = -32768;
		else sidst[0] = (short int)yl;

		if (yr>32767)
			sidst[1] = 32767;
		else if (yr<-32768)
			sidst[1] = -32768;
		else sidst[1] = (short int)yr;

	}

	memcpy( lZi, src+N_src-N_FILTERORDER, N_FILTERORDER*4 );

	return 0;
}




