//-----------------------------------------------------------------------------
//
// Zandronum Source
// Copyright (C) 2023 Adam Kaminski
// Copyright (C) 2023 Zandronum Development Team
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
// 3. Neither the name of the Zandronum Development Team nor the names of its
//    contributors may be used to endorse or promote products derived from this
//    software without specific prior written permission.
// 4. Redistributions in any form must be accompanied by information on how to
//    obtain complete source code for the software and any accompanying
//    software that uses the software. The source code must either be included
//    in the distribution or be available for no more than the cost of
//    distribution plus a nominal fee, and must be freely redistributable
//    under reasonable conditions. For an executable file, complete source
//    code means the source code for all modules it contains. It does not
//    include source code for modules or files that typically accompany the
//    major components of the operating system on which the executable file
//    runs.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
//
//
// Filename: voicechat.cpp
//
//-----------------------------------------------------------------------------

#include "voicechat.h"
#include "c_dispatch.h"
#include "cl_commands.h"
#include "cl_demo.h"
#include "d_netinf.h"
#include "network.h"
#include "v_text.h"
#include "stats.h"
#include "p_acs.h"

//*****************************************************************************
//	CONSOLE VARIABLES

// [AK] Which input device to use when recording audio.
CVAR( Int, snd_recorddriver, 0, CVAR_ARCHIVE | CVAR_NOSETBYACS | CVAR_GLOBALCONFIG )

// [AK] If enabled, clients can transmit audio based on voice activity.
CVAR( Bool, snd_recordvoiceactivity, false, CVAR_ARCHIVE | CVAR_NOSETBYACS | CVAR_GLOBALCONFIG )

// [AK] Enables noise suppression while transmitting audio.
CVAR( Bool, snd_suppressnoise, true, CVAR_ARCHIVE | CVAR_NOSETBYACS | CVAR_GLOBALCONFIG )

// [AK] Allows the client to load a custom RNNoise model file.
CVAR( String, snd_noisemodelfile, "", CVAR_ARCHIVE | CVAR_NOSETBYACS | CVAR_GLOBALCONFIG )

// [AK] How sensitive voice activity detection is, in decibels.
CUSTOM_CVAR( Float, snd_recordsensitivity, -50.0f, CVAR_ARCHIVE | CVAR_NOSETBYACS | CVAR_GLOBALCONFIG )
{
	const float clampedValue = clamp<float>( self, -100.0f, 0.0f );

	if ( self != clampedValue )
		self = clampedValue;
}

// [AK] Controls the volume of everyone's voices on the client's end.
CUSTOM_CVAR( Float, snd_voicechatvolume, 1.0f, CVAR_ARCHIVE | CVAR_NOSETBYACS | CVAR_GLOBALCONFIG | CVAR_NOINITCALL )
{
	const float clampedValue = clamp<float>( self, 0.0f, 1.0f );

	if ( self != clampedValue )
	{
		self = clampedValue;
		return;
	}

#ifndef NO_SOUND
	VOIPController::GetInstance( ).SetVolume( self );
#endif NO_SOUND
}

// [AK] How the voice chat is used on the server (0 = never, 1 = always, 2 = teammates only).
CUSTOM_CVAR( Int, sv_allowvoicechat, VOICECHAT_EVERYONE, CVAR_NOSETBYACS | CVAR_SERVERINFO )
{
	const int clampedValue = clamp<int>( self, VOICECHAT_OFF, VOICECHAT_TEAMMATESONLY );

	if ( self != clampedValue )
	{
		self = clampedValue;
		return;
	}

	// [AK] Notify the clients about the change.
	SERVER_SettingChanged( self, false );
}

// [AK] Enables or disables proximity-based voice chat.
CUSTOM_CVAR( Bool, sv_proximityvoicechat, false, CVAR_NOSETBYACS | CVAR_SERVERINFO )
{
#ifndef NO_SOUND
	VOIPController::GetInstance( ).UpdateProximityChat( );
#endif NO_SOUND

	// [AK] Notify the clients about the change.
	SERVER_SettingChanged( self, false );
}

//*****************************************************************************
//	CONSOLE COMMANDS

// [AK] Everything past this point only compiles if compiling with sound.
#ifndef NO_SOUND

static void voicechat_SetChannelVolume( FCommandLine &argv, const ULONG ulPlayer )
{
	// [AK] Mods are not allowed to change a VoIP channel's volume.
	if (( ACS_IsCalledFromConsoleCommand( )) || ( argv.argc( ) < 3 ))
		return;

	if ( ulPlayer == MAXPLAYERS )
	{
		Printf( "There isn't a player named %s.\n", argv[1] );
		return;
	}
	else if ( players[ulPlayer].bIsBot )
	{
		Printf( "Player %s is a bot.\n", argv[1] );
		return;
	}

	float volume = clamp<float>( static_cast<float>( atof( argv[2] )), 0.0f, 1.0f );
	VOIPController::GetInstance( ).SetChannelVolume( ulPlayer, volume );
}

// [AK] Changes the volume of one VoIP channel, using the player's name.
CCMD( setchanvolume )
{
	if ( argv.argc( ) < 3 )
	{
		Printf( "%s <name> <volume, 0.0 to 1.0>\n", argv[0] );
		return;
	}

	voicechat_SetChannelVolume( argv, SERVER_GetPlayerIndexFromName( argv[1], true, true ));
}

// [AK] Same as setchanvolume, but uses the player's index instead.
CCMD( setchanvolume_idx )
{
	int playerIndex;

	if ( argv.argc( ) < 3 )
	{
		Printf( "%s <index> <volume, 0.0 to 1.0>\n", argv[0] );
		return;
	}

	if (( argv.SafeGetNumber( 1, playerIndex ) == false ) || ( PLAYER_IsValidPlayer( playerIndex ) == false ))
		return;

	voicechat_SetChannelVolume( argv, playerIndex );
}

// [AK] Lists all recording devices that are currently connected.
CCMD( snd_listrecorddrivers )
{
	VOIPController::GetInstance( ).ListRecordDrivers( );
}

//*****************************************************************************
//	FUNCTIONS

//*****************************************************************************
//
// [AK] VOIPController::VOIPController
//
// Initializes all members of VOIPController to their default values, and resets
// the state of the "voicerecord" button.
//
//*****************************************************************************

VOIPController::VOIPController( void ) :
	pVoIPChannels{ nullptr },
	channelVolumes{ 1.0f },
	pSystem( nullptr ),
	pRecordSound( nullptr ),
	pVoIPChannelGroup( nullptr ),
	pEncoder( nullptr ),
	pDenoiseModel( nullptr ),
	pDenoiseState( nullptr ),
	recordDriverID( 0 ),
	framesSent( 0 ),
	lastRecordPosition( 0 ),
	bIsInitialized( false ),
	bIsActive( false ),
	TransmissionType( TRANSMISSIONTYPE_OFF )
{
	proximityInfo.SysChannel = nullptr;
	proximityInfo.StartTime.AsOne = 0;
	proximityInfo.Rolloff.RolloffType = ROLLOFF_Doom;
	proximityInfo.Rolloff.MinDistance = 200.0f;
	proximityInfo.Rolloff.MaxDistance = 1200.0f;
	proximityInfo.DistanceScale = 1.0f;

	Button_VoiceRecord.Reset( );
}

//*****************************************************************************
//
// [AK] VOIPController::Init
//
// Initializes the VoIP controller.
//
//*****************************************************************************

void VOIPController::Init( FMOD::System *pMainSystem )
{
	int opusErrorCode = OPUS_OK;

	// [AK] The server never initializes the voice recorder.
	if ( NETWORK_GetState( ) == NETSTATE_SERVER )
		return;

	pSystem = pMainSystem;

	// [AK] Abort if the FMOD system is invalid. This should never happen.
	if ( pSystem == nullptr )
	{
		Printf( TEXTCOLOR_ORANGE "Invalid FMOD::System pointer used to initialize VoIP controller.\n" );
		return;
	}

	FMOD_CREATESOUNDEXINFO exinfo = CreateSoundExInfo( RECORD_SAMPLE_RATE, RECORD_SOUND_LENGTH );

	// [AK] Abort if creating the sound to record into failed.
	if ( pSystem->createSound( nullptr, FMOD_LOOP_NORMAL | FMOD_2D | FMOD_OPENUSER, &exinfo, &pRecordSound ) != FMOD_OK )
	{
		Printf( TEXTCOLOR_ORANGE "Failed to create sound for recording.\n" );
		return;
	}

	// [AK] Create the player VoIP channel group.
	if ( pSystem->createChannelGroup( "VoIP", &pVoIPChannelGroup ) != FMOD_OK )
	{
		Printf( TEXTCOLOR_ORANGE "Failed to create VoIP channel group for playback.\n" );
		return;
	}

	pEncoder = opus_encoder_create( PLAYBACK_SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP, &opusErrorCode );

	// [AK] Stop here if the Opus encoder wasn't created successfully.
	if ( opusErrorCode != OPUS_OK )
	{
		Printf( TEXTCOLOR_ORANGE "Failed to create Opus encoder: %s.\n", opus_strerror( opusErrorCode ));
		return;
	}

	opus_encoder_ctl( pEncoder, OPUS_SET_FORCE_CHANNELS( 1 ));
	opus_encoder_ctl( pEncoder, OPUS_SET_SIGNAL( OPUS_SIGNAL_VOICE ));

	// [AK] Load a custom RNNoise model file if we can. Otherwise, use the built-in model.
	if ( strlen( snd_noisemodelfile ) > 0 )
	{
		const char *pFileName = snd_noisemodelfile.GetGenericRep( CVAR_String ).String;
		FILE *pModelFile = fopen( pFileName, "r" );

		if ( pModelFile != nullptr )
		{
			pDenoiseModel = rnnoise_model_from_file( pModelFile );

			if ( pDenoiseModel == nullptr )
				Printf( TEXTCOLOR_ORANGE "Failed to load RNNoise model \"%s\". Using built-in model instead.\n", pFileName );
		}
		else
		{
			Printf( TEXTCOLOR_YELLOW "Couldn't find RNNoise model \"%s\". Using built-in model instead.\n", pFileName );
		}
	}

	// [AK] Initialize the denoise state, used for noise suppression.
	pDenoiseState = rnnoise_create( pDenoiseModel );

	bIsInitialized = true;
	Printf( "VoIP controller initialized successfully.\n" );
}

//*****************************************************************************
//
// [AK] VOIPController::Shutdown
//
// Stops recording from the input device (if we were doing that), releases all
// memory used by the FMOD system, and shuts down the VoIP controller.
//
//*****************************************************************************

void VOIPController::Shutdown( void )
{
	Deactivate( );

	if ( pEncoder != nullptr )
	{
		opus_encoder_destroy( pEncoder );
		pEncoder = nullptr;
	}

	if ( pRecordSound != nullptr )
	{
		pRecordSound->release( );
		pRecordSound = nullptr;
	}

	if ( pVoIPChannelGroup != nullptr )
	{
		pVoIPChannelGroup->release( );
		pVoIPChannelGroup = nullptr;
	}

	if ( pDenoiseModel != nullptr )
	{
		rnnoise_model_free( pDenoiseModel );
		pDenoiseModel = nullptr;
	}

	if ( pDenoiseState != nullptr )
	{
		rnnoise_destroy( pDenoiseState );
		pDenoiseState = nullptr;
	}

	bIsInitialized = false;
	Printf( "VoIP controller shutting down.\n" );
}

//*****************************************************************************
//
// [AK] VOIPController::Activate
//
// Starts recording from the selected record driver.
//
//*****************************************************************************

void VOIPController::Activate( void )
{
	int numRecordDrivers = 0;

	if (( bIsInitialized == false ) || ( bIsActive ))
		return;

	// [AK] Try to start recording from the selected record driver.
	if ( CLIENTDEMO_IsPlaying( ) == false )
	{
		if ( pSystem->getRecordNumDrivers( &numRecordDrivers ) == FMOD_OK )
		{
			if ( numRecordDrivers > 0 )
			{
				if ( snd_recorddriver >= numRecordDrivers )
				{
					Printf( "Record driver %d doesn't exist. Using 0 instead.\n", *snd_recorddriver );
					recordDriverID = 0;
				}
				else
				{
					recordDriverID = snd_recorddriver;
				}

				if ( pSystem->recordStart( recordDriverID, pRecordSound, true ) != FMOD_OK )
					Printf( TEXTCOLOR_ORANGE "Failed to start VoIP recording.\n" );
			}
			else
			{
				Printf( TEXTCOLOR_ORANGE "Failed to find any connected record drivers.\n" );
			}
		}
		else
		{
			Printf( TEXTCOLOR_ORANGE "Failed to retrieve number of record drivers.\n" );
		}
	}

	bIsActive = true;
}


//*****************************************************************************
//
// [AK] VOIPController::Deactivate
//
// Stops recording from the VoIP controller.
//
//*****************************************************************************

void VOIPController::Deactivate( void )
{
	if (( bIsInitialized == false ) || ( bIsActive == false ))
		return;

	// [AK] Clear all of the VoIP channels.
	for ( ULONG ulIdx = 0; ulIdx < MAXPLAYERS; ulIdx++ )
	{
		if ( pVoIPChannels[ulIdx] != nullptr )
		{
			delete pVoIPChannels[ulIdx];
			pVoIPChannels[ulIdx] = nullptr;
		}
	}

	// [AK] If we're in the middle of a transmission, stop that too.
	StopTransmission( );

	if ( pSystem->recordStop( recordDriverID ) != FMOD_OK )
	{
		Printf( TEXTCOLOR_ORANGE "Failed to stop voice recording.\n" );
		return;
	}

	framesSent = 0;
	bIsActive = false;
}

//*****************************************************************************
//
// [AK] VOIPController::Tick
//
// Executes any routines that the VoIP controller must do every tick.
//
//*****************************************************************************

void VOIPController::Tick( void )
{
	// [AK] Don't tick while the VoIP controller is uninitialized.
	if ( bIsInitialized == false )
		return;

	if ( IsVoiceChatAllowed( false ))
	{
		if ( bIsActive == false )
			Activate( );
	}
	else if ( bIsActive )
	{
		Deactivate( );
	}

	// [AK] Check the status of the "voicerecord" button. If the button's been
	// pressed, start transmitting, or it's been released stop transmitting.
	if (( Button_VoiceRecord.bWentUp ) && ( TransmissionType == TRANSMISSIONTYPE_BUTTON ))
	{
		StopTransmission( );
	}
	else if (( Button_VoiceRecord.bWentDown ) && ( snd_recordvoiceactivity == false ))
	{
		if ( IsVoiceChatAllowed( true ))
			StartTransmission( TRANSMISSIONTYPE_BUTTON, true );
		// [AK] We can't transmit if we're watching a demo.
		else if ( CLIENTDEMO_IsPlaying( ))
			Printf( "Voice chat can't be used during demo playback.\n" );
		// ...or if we're in an offline game.
		else if (( NETWORK_GetState( ) == NETSTATE_SINGLE ) || ( NETWORK_GetState( ) == NETSTATE_SINGLE_MULTIPLAYER ))
			Printf( "Voice chat can't be used in a singleplayer game.\n" );
		// ...or if the server has disabled voice chatting.
		else if ( sv_allowvoicechat == VOICECHAT_OFF )
			Printf( "Voice chat has been disabled by the server.\n" );
	}

	if ( bIsActive == false )
		return;

	// [AK] Are we're transmitting audio by pressing the "voicerecord" button right
	// now, or using voice activity detection? We'll check if we have enough new
	// samples recorded to fill an audio frame that can be encoded and sent out.
	if (( TransmissionType != TRANSMISSIONTYPE_OFF ) || ( snd_recordvoiceactivity ))
	{
		unsigned int recordPosition = 0;

		if (( pSystem->getRecordPosition( recordDriverID, &recordPosition ) == FMOD_OK ) && ( recordPosition != lastRecordPosition ))
		{
			unsigned int recordDelta = recordPosition >= lastRecordPosition ? recordPosition - lastRecordPosition : recordPosition + RECORD_SOUND_LENGTH - lastRecordPosition;

			// [AK] We may need to send out multiple audio frames in a single tic.
			for ( unsigned int frame = 0; frame < recordDelta / RECORD_SAMPLES_PER_FRAME; frame++ )
			{
				void *ptr1, *ptr2;
				unsigned int len1, len2;

				if ( pRecordSound->lock( lastRecordPosition * SAMPLE_SIZE, RECORD_SAMPLES_PER_FRAME * SAMPLE_SIZE, &ptr1, &ptr2, &len1, &len2 ) == FMOD_OK )
				{
					unsigned char combinedBuffers[RECORD_SAMPLES_PER_FRAME * SAMPLE_SIZE] = { 0 };

					// [AK] Combine the ptr1 and ptr2 buffers into a single buffer.
					if (( ptr1 != nullptr ) && ( len1 > 0 ))
					{
						memcpy( combinedBuffers, ptr1, len1 );

						if (( ptr2 != nullptr ) && ( len2 > 0 ))
							memcpy( combinedBuffers + len1, ptr2, len2 );
					}

					float uncompressedBuffer[RECORD_SAMPLES_PER_FRAME];
					float downsizedBuffer[PLAYBACK_SAMPLES_PER_FRAME];
					float rms = 0.0f;

					for ( unsigned int i = 0; i < RECORD_SAMPLES_PER_FRAME; i++ )
					{
						const unsigned int indexBase = i * SAMPLE_SIZE;
						union { DWORD l; float f; } dataUnion;

						dataUnion.l = 0;

						// [AK] Convert from a byte array to a float in little-endian.
						for ( unsigned int byte = 0; byte < SAMPLE_SIZE; byte++ )
							dataUnion.l |= combinedBuffers[indexBase + byte] << 8 * byte;

						uncompressedBuffer[i] = dataUnion.f;
					}

					// [AK] Denoise the audio frame.
					if (( snd_suppressnoise ) && ( pDenoiseState != nullptr ))
					{
						for ( unsigned int i = 0; i < RECORD_SAMPLES_PER_FRAME; i++ )
							uncompressedBuffer[i] *= SHRT_MAX;

						rnnoise_process_frame( pDenoiseState, uncompressedBuffer, uncompressedBuffer );

						for ( unsigned int i = 0; i < RECORD_SAMPLES_PER_FRAME; i++ )
							uncompressedBuffer[i] /= SHRT_MAX;
					}

					// [AK] If using voice activity detection, calculate the RMS.
					// This must be done after denoising the audio frame.
					if ( TransmissionType != TRANSMISSIONTYPE_BUTTON )
					{
						for ( unsigned int i = 0; i < RECORD_SAMPLES_PER_FRAME; i++ )
							rms += powf( uncompressedBuffer[i], 2 );

						rms = sqrtf( rms / RECORD_SAMPLES_PER_FRAME );
					}

					// [AK] Check if the audio frame should actually be sent. This is
					// always the case while pressing the "voicerecord" button, or if
					// the sound intensity exceeds the minimum threshold.
					if (( TransmissionType == TRANSMISSIONTYPE_BUTTON ) || ( 20 * log10( rms ) >= snd_recordsensitivity ))
					{
						// [AK] If we're using voice activity, and not transmitting
						// audio already, then start transmitting now.
						if ( TransmissionType == TRANSMISSIONTYPE_OFF )
							StartTransmission( TRANSMISSIONTYPE_VOICEACTIVITY, false );

						// [AK] Downsize the input audio frame from 48 kHz to 24 kHz.
						for ( unsigned int i = 0; i < PLAYBACK_SAMPLES_PER_FRAME; i++ )
							downsizedBuffer[i] = ( uncompressedBuffer[2 * i] + uncompressedBuffer[2 * i + 1] ) / 2.0f;

						unsigned char compressedBuffer[MAX_PACKET_SIZE];
						int numBytesEncoded = EncodeOpusFrame( downsizedBuffer, PLAYBACK_SAMPLES_PER_FRAME, compressedBuffer, MAX_PACKET_SIZE );

						if ( numBytesEncoded > 0 )
							CLIENTCOMMANDS_VoIPAudioPacket( framesSent++, compressedBuffer, numBytesEncoded );
					}
					else
					{
						StopTransmission( );
					}
				}

				lastRecordPosition = ( lastRecordPosition + RECORD_SAMPLES_PER_FRAME ) % RECORD_SOUND_LENGTH;
			}
		}
	}

	// [AK] Delete any VoIPs channel belonging to players that are no longer valid,
	// or have been muted for too long to bother keeping around.
	for ( ULONG ulIdx = 0; ulIdx < MAXPLAYERS; ulIdx++ )
	{
		if ( pVoIPChannels[ulIdx] == nullptr )
			continue;

		if (( PLAYER_IsValidPlayer( ulIdx ) == false ) || (( pVoIPChannels[ulIdx]->IsMuted( )) && ( pVoIPChannels[ulIdx]->timeoutTick <= gametic )))
		{
			delete pVoIPChannels[ulIdx];
			pVoIPChannels[ulIdx] = nullptr;
		}
	}
}

//*****************************************************************************
//
// [AK] VOIPController::StartTransmission
//
// Prepares the VoIP controller to start transmitting audio to the server.
//
//*****************************************************************************

void VOIPController::StartTransmission( const TRANSMISSIONTYPE_e Type, const bool bGetRecordPosition )
{
	if (( bIsInitialized == false ) || ( bIsActive == false ) || ( TransmissionType != TRANSMISSIONTYPE_OFF ))
		return;

	if (( bGetRecordPosition ) && ( pSystem->getRecordPosition( recordDriverID, &lastRecordPosition ) != FMOD_OK ))
	{
		Printf( TEXTCOLOR_ORANGE "Failed to get position of voice recording.\n" );
		return;
	}

	TransmissionType = Type;
}

//*****************************************************************************
//
// [AK] VOIPController::StopTransmission
//
// Stops transmitting audio to the server.
//
//*****************************************************************************

void VOIPController::StopTransmission( void )
{
	TransmissionType = TRANSMISSIONTYPE_OFF;
	DPrintf( "VoIP controller ending transmission.\n" );
}

//*****************************************************************************
//
// [AK] VOIPController::IsVoiceChatAllowed
//
// Checks if voice chat can be used right now.
//
//*****************************************************************************

bool VOIPController::IsVoiceChatAllowed( const bool bDisallowDemoPlayback ) const
{
	// [AK] Voice chat can only be used on the client's end.
	if (( NETWORK_InClientMode( ) == false ) || (( bDisallowDemoPlayback ) && ( CLIENTDEMO_IsPlaying( ))))
		return false;

	// [AK] Voice chat can only be used when it's enabled.
	if (( sv_allowvoicechat == VOICECHAT_OFF ) || ( players[consoleplayer].userinfo.GetClientFlags( ) & CLIENTFLAGS_NOVOICECHAT ))
		return false;

	// [AK] Voice chat can only be used while in the level or intermission screen.
	if (( gamestate != GS_LEVEL ) && ( gamestate != GS_INTERMISSION ))
		return false;

	return true;
}

//*****************************************************************************
//
// [AK] VOIPController::IsPlayerTalking
//
// Checks if the specified player is talking right now. If the player is the
// same as the local player, then they're talking while transmitting audio.
// Otherwise, they're talking if their channel is playing.
//
//*****************************************************************************

bool VOIPController::IsPlayerTalking( const ULONG ulPlayer ) const
{
	if (( ulPlayer == static_cast<ULONG>( consoleplayer )) && ( TransmissionType != TRANSMISSIONTYPE_OFF ))
		return true;

	if (( PLAYER_IsValidPlayer( ulPlayer )) && ( pVoIPChannels[ulPlayer] != nullptr ))
		return (( pVoIPChannels[ulPlayer]->pChannel != nullptr ) && ( pVoIPChannels[ulPlayer]->IsMuted( ) == false ));

	return false;
}

//*****************************************************************************
//
// [AK] VOIPController::SetChannelVolume
//
// Adjusts the volume for one particular VoIP channel.
//
//*****************************************************************************

void VOIPController::SetChannelVolume( const ULONG ulPlayer, float volume )
{
	if (( bIsInitialized == false ) || ( ulPlayer >= MAXPLAYERS ))
		return;

	channelVolumes[ulPlayer] = volume;

	if ( pVoIPChannels[ulPlayer] == nullptr )
		return;

	if (( pVoIPChannels[ulPlayer]->pChannel == nullptr ) || ( pVoIPChannels[ulPlayer]->pChannel->setVolume( volume ) != FMOD_OK ))
		Printf( TEXTCOLOR_ORANGE "Couldn't change the volume of VoIP channel %u.\n", ulPlayer );
}

//*****************************************************************************
//
// [AK] VOIPController::SetVolume
//
// Adjusts the volume of all VoIP channels.
//
//*****************************************************************************

void VOIPController::SetVolume( float volume )
{
	if ( bIsInitialized == false )
		return;

	if (( pVoIPChannelGroup == nullptr ) || ( pVoIPChannelGroup->setVolume( volume ) != FMOD_OK ))
		Printf( TEXTCOLOR_ORANGE "Couldn't change the volume of the VoIP channel group.\n" );
}

//*****************************************************************************
//
// [AK] VOIPController::SetPitch
//
// Adjusts the pitch of all VoIP channels.
//
//*****************************************************************************

void VOIPController::SetPitch( float pitch )
{
	if ( bIsInitialized == false )
		return;

	if (( pVoIPChannelGroup == nullptr ) || ( pVoIPChannelGroup->setPitch( pitch ) != FMOD_OK ))
		Printf( TEXTCOLOR_ORANGE "Couldn't change the pitch of the VoIP channel group.\n" );
}

//*****************************************************************************
//
// [AK] VOIPController::ListRecordDrivers
//
// Prints a list of all record drivers that are connected in the same format
// as FMODSoundRenderer::PrintDriversList.
//
//*****************************************************************************

void VOIPController::ListRecordDrivers( void ) const
{
	int numDrivers = 0;
	char pszName[256];

	if (( pSystem != nullptr ) && ( pSystem->getRecordNumDrivers( &numDrivers ) == FMOD_OK ))
	{
		for ( int i = 0; i < numDrivers; i++ )
		{
			if ( pSystem->getRecordDriverInfo( i, pszName, sizeof( pszName ), nullptr ) == FMOD_OK )
				Printf( "%d. %s\n", i, pszName );
		}
	}
}

//*****************************************************************************
//
// [AK] VOIPController::GrabStats
//
// Returns a string showing the VoIP controller's status, which VoIP channels
// are currently playing, and how many samples have been read and played.
//
//*****************************************************************************

FString VOIPController::GrabStats( void ) const
{
	FString out;

	out.Format( "VoIP controller status: %s\n", TransmissionType != TRANSMISSIONTYPE_OFF ? "transmitting" : ( bIsActive ? "activated" : "deactivated" ));

	for ( ULONG ulIdx = 0; ulIdx < MAXPLAYERS; ulIdx++ )
	{
		if ( pVoIPChannels[ulIdx] == nullptr )
			continue;

		out.AppendFormat( "VoIP channel %u (%s): ", ulIdx, players[ulIdx].userinfo.GetName( ));

		if ( IsPlayerTalking( ulIdx ))
		{
			out.AppendFormat( "samples read/played = %u/%u", pVoIPChannels[ulIdx]->samplesRead, pVoIPChannels[ulIdx]->samplesPlayed );

			if ( pVoIPChannels[ulIdx]->samplesRead >= pVoIPChannels[ulIdx]->samplesPlayed )
				out.AppendFormat( " (diff = %u)", pVoIPChannels[ulIdx]->samplesRead - pVoIPChannels[ulIdx]->samplesPlayed );
		}
		else
		{
			out += pVoIPChannels[ulIdx]->IsMuted( ) ? "muted" : "unknown status";
		}

		out += '\n';
	}

	return out;
}

//*****************************************************************************
//
// [AK] VOIPController::ReceiveAudioPacket
//
// This is called when the client receives an audio packet from the server,
// previously sent by another client. The packet is decoded and saved into the
// jitter buffer belonging to that client's channel, where it will be played.
//
//*****************************************************************************

void VOIPController::ReceiveAudioPacket( const ULONG ulPlayer, const unsigned int frame, const unsigned char *pData, const unsigned int length )
{
	if (( bIsActive == false ) || ( PLAYER_IsValidPlayer( ulPlayer ) == false ) || ( pData == nullptr ) || ( length == 0 ))
		return;

	// [AK] If this player's channel doesn't exist yet, create a new one.
	if ( pVoIPChannels[ulPlayer] == nullptr )
		pVoIPChannels[ulPlayer] = new VOIPChannel( ulPlayer );

	// [AK] Don't accept any frames that arrived too late.
	if ( frame < pVoIPChannels[ulPlayer]->lastFrameRead )
		return;

	VOIPChannel::AudioFrame newAudioFrame;
	newAudioFrame.frame = frame;

	if ( pVoIPChannels[ulPlayer]->DecodeOpusFrame( pData, length, newAudioFrame.samples, PLAYBACK_SAMPLES_PER_FRAME ) > 0 )
	{
		// [AK] Insert the new audio frame in the jitter buffer. The frames must
		// be ordered correctly so that the audio isn't distorted.
		for ( unsigned int i = 0; i < pVoIPChannels[ulPlayer]->jitterBuffer.Size( ); i++ )
		{
			if ( newAudioFrame.frame < pVoIPChannels[ulPlayer]->jitterBuffer[i].frame )
			{
				pVoIPChannels[ulPlayer]->jitterBuffer.Insert( i, newAudioFrame );
				return;
			}
		}

		// [AK] Wait five tics before playing this VoIP channel. Also update the
		// timeout tick so that the channel doesn't get deleted right away.
		if (( pVoIPChannels[ulPlayer]->jitterBuffer.Size( ) == 0 ) && ( pVoIPChannels[ulPlayer]->IsMuted( )))
		{
			pVoIPChannels[ulPlayer]->playbackTick = gametic + 5;
			pVoIPChannels[ulPlayer]->timeoutTick = gametic + CHANNEL_TIMEOUT_TICKS;
		}

		pVoIPChannels[ulPlayer]->jitterBuffer.Push( newAudioFrame );
	}
}

//*****************************************************************************
//
// [AK] VOIPController::UpdateProximityChat
//
// Updates the VoIP controller's proximity chat for every player's channel. If
// proximity chat is enabled, and the player isn't spectating or being spied
// on, then 3D mode is enabled and their 3D attributes (position and velocity)
// are updated.
//
// Otherwise, 3D mode is disabled and 2D mode is re-enabled.
//
//*****************************************************************************

void VOIPController::UpdateProximityChat( void )
{
	FMOD_MODE mode;

	for ( ULONG ulIdx = 0; ulIdx < MAXPLAYERS; ulIdx++ )
	{
		if (( playeringame[ulIdx] == false ) || ( pVoIPChannels[ulIdx] == nullptr ))
			continue;

		// [AK] Something went wrong with getting the channel's mode.
		if (( pVoIPChannels[ulIdx]->pChannel == nullptr ) || ( pVoIPChannels[ulIdx]->pChannel->getMode( &mode ) != FMOD_OK ))
		{
			Printf( TEXTCOLOR_ORANGE "Couldn't get mode for VoIP channel %u.\n", ulIdx );
			continue;
		}

		FMOD_MODE oldMode = mode;

		if ( pVoIPChannels[ulIdx]->ShouldPlayIn3DMode( ))
			mode = ( mode & ~FMOD_2D ) | FMOD_3D;
		else
			mode = ( mode & ~FMOD_3D ) | FMOD_2D;

		if (( mode != oldMode ) && ( pVoIPChannels[ulIdx]->pChannel->setMode( mode ) != FMOD_OK ))
		{
			Printf( TEXTCOLOR_ORANGE "Couldn't set mode for VoIP channel %u.\n", ulIdx );
			continue;
		}

		if ( mode & FMOD_3D )
			pVoIPChannels[ulIdx]->Update3DAttributes( );
	}
}

//*****************************************************************************
//
// [AK] VOIPController::EncodeOpusFrame
//
// Encodes a single audio frame using the Opus audio codec, and returns the
// number of bytes encoded. If encoding fails, an error message is printed.
//
//*****************************************************************************

int VOIPController::EncodeOpusFrame( const float *pInBuffer, const unsigned int inLength, unsigned char *pOutBuffer, const unsigned int outLength )
{
	if (( pInBuffer == nullptr ) || ( pOutBuffer == nullptr ))
		return 0;

	int numBytesEncoded = opus_encode_float( pEncoder, pInBuffer, inLength, pOutBuffer, outLength );

	// [AK] Print the error message if encoding failed.
	if ( numBytesEncoded <= 0 )
	{
		Printf( TEXTCOLOR_ORANGE "Failed to encode Opus audio frame: %s.\n", opus_strerror( numBytesEncoded ));
		return 0;
	}

	return numBytesEncoded;
}

//*****************************************************************************
//
// [AK] VOIPController::CreateSoundExInfo
//
// Returns an FMOD_CREATESOUNDEXINFO struct with the settings needed to create
// new FMOD sounds used by the VoIP controller. The sample rate and file length
// (in PCM samples) can be adjusted as required.
//
//*****************************************************************************

FMOD_CREATESOUNDEXINFO VOIPController::CreateSoundExInfo( const unsigned int sampleRate, const unsigned int fileLength )
{
	FMOD_CREATESOUNDEXINFO exinfo;

	memset( &exinfo, 0, sizeof( FMOD_CREATESOUNDEXINFO ));
	exinfo.cbsize = sizeof( FMOD_CREATESOUNDEXINFO );
	exinfo.numchannels = 1;
	exinfo.format = FMOD_SOUND_FORMAT_PCMFLOAT;
	exinfo.defaultfrequency = sampleRate;
	exinfo.length = fileLength * SAMPLE_SIZE;

	return exinfo;
}

//*****************************************************************************
//
// [AK] VOIPController::PCMReadCallback
//
// Static callback function that reads any audio samples in a VoIP channel's
// jitter buffer when the sound is being played. If there's no more data left
// in the buffer and the number of samples played exceeds the number of samples
// loaded, then the channel will be stopped.
//
//*****************************************************************************

FMOD_RESULT F_CALLBACK VOIPController::PCMReadCallback( FMOD_SOUND *sound, void *data, unsigned int datalen )
{
	VOIPController &instance = VOIPController::GetInstance( );
	FMOD::Sound *pCastedSound = reinterpret_cast<FMOD::Sound *>( sound );

	// [AK] Find which VoIP channel the sound belongs to.
	for ( ULONG ulIdx = 0; ulIdx < MAXPLAYERS; ulIdx++ )
	{
		if (( instance.pVoIPChannels[ulIdx] != nullptr ) && ( pCastedSound == instance.pVoIPChannels[ulIdx]->pSound ))
		{
			instance.pVoIPChannels[ulIdx]->ReadSamples( reinterpret_cast<unsigned char *>( data ), datalen );
			break;
		}
	}

	return FMOD_OK;
}

//*****************************************************************************
//
// [AK] VOIPController::PCMSetPosCallback
//
// Static callback function that does nothing except return FMOD_OK.
//
//*****************************************************************************

FMOD_RESULT F_CALLBACK VOIPController::PCMSetPosCallback( FMOD_SOUND *sound, int subsound, unsigned int position, FMOD_TIMEUNIT postype )
{
	return FMOD_OK;
}

//*****************************************************************************
//
// [AK] VOIPController::VOIPChannel::VOIPChannel
//
// Creates the channel's decoder and FMOD sound/channel, and sets all members
// to their default values.
//
//*****************************************************************************

VOIPController::VOIPChannel::VOIPChannel( const ULONG ulPlayer ) :
	ulPlayer( ulPlayer ),
	pSound( nullptr ),
	pChannel( nullptr ),
	pDecoder( nullptr ),
	playbackTick( 0 ),
	timeoutTick( 0 ),
	lastPlaybackPosition( 0 ),
	lastFrameRead( 0 ),
	samplesRead( 0 ),
	samplesPlayed( 0 )
{
	int opusErrorCode = OPUS_OK;
	pDecoder = opus_decoder_create( PLAYBACK_SAMPLE_RATE, 1, &opusErrorCode );

	// [AK] Print an error message if the Opus decoder wasn't created successfully.
	if ( opusErrorCode != OPUS_OK )
		Printf( TEXTCOLOR_ORANGE "Failed to create Opus decoder for VoIP channel %u: %s.\n", ulPlayer, opus_strerror( opusErrorCode ));

	FMOD_CREATESOUNDEXINFO exinfo = VOIPController::CreateSoundExInfo( PLAYBACK_SAMPLE_RATE, PLAYBACK_SOUND_LENGTH );
	FMOD_MODE mode = FMOD_OPENUSER | FMOD_LOOP_NORMAL | FMOD_SOFTWARE | FMOD_CREATESTREAM;

	exinfo.decodebuffersize = DECODE_BUFFER_SIZE;
	exinfo.pcmreadcallback = PCMReadCallback;
	exinfo.pcmsetposcallback = PCMSetPosCallback;

	// [AK] Determine if this sound should be played in 3D or 2D mode.
	mode |= ShouldPlayIn3DMode( ) ? FMOD_3D : FMOD_2D;

	FMOD::System *pSystem = VOIPController::GetInstance( ).pSystem;

	if (( pSystem == nullptr ) || ( pSystem->createSound( nullptr, mode, &exinfo, &pSound ) != FMOD_OK ))
	{
		Printf( TEXTCOLOR_ORANGE "Failed to create sound for VoIP channel %u.\n", ulPlayer );
	}
	else if ( pSystem->playSound( FMOD_CHANNEL_FREE, pSound, true, &pChannel ) != FMOD_OK )
	{
		Printf( TEXTCOLOR_ORANGE "Failed to start playing VoIP channel %u.\n", ulPlayer );
	}
	else
	{
		pChannel->setChannelGroup( VOIPController::GetInstance( ).pVoIPChannelGroup );
		pChannel->setUserData( &VOIPController::GetInstance( ).proximityInfo );
		pChannel->setVolume( VOIPController::GetInstance( ).channelVolumes[ulPlayer] );
		pChannel->setMute( true );

		// [AK] If the channel should be played in 3D mode, update its 3D attributes.
		if ( ShouldPlayIn3DMode( ))
			Update3DAttributes( );

		pChannel->setPaused( false );
	}
}

//*****************************************************************************
//
// [AK] VOIPController::VOIPChannel::~VOIPChannel
//
// Destroys the decoder and FMOD sound/channel.
//
//*****************************************************************************

VOIPController::VOIPChannel::~VOIPChannel( void )
{
	if ( pChannel != nullptr )
	{
		pChannel->stop( );
		pChannel = nullptr;
	}

	if ( pSound != nullptr )
	{
		pSound->release( );
		pSound = nullptr;
	}

	if ( pDecoder != nullptr )
	{
		opus_decoder_destroy( pDecoder );
		pDecoder = nullptr;
	}

	// [AK] Reset this channel's volume back to default.
	VOIPController::GetInstance( ).channelVolumes[ulPlayer] = 1.0f;
}

//*****************************************************************************
//
// [AK] VOIPController::VOIPChannel::IsMuted
//
// Checks if the VoIP channel is muted or not. In context, a muted channel is
// technically still playing in the background, but doesn't have enough new
// samples, so it remains muted (so the same samples don't play continuously)
// until new samples arrive.
//
//*****************************************************************************

bool VOIPController::VOIPChannel::IsMuted( void ) const
{
	bool bIsMuted = false;

	if (( pChannel != nullptr ) && ( pChannel->getMute( &bIsMuted ) == FMOD_OK ))
		return bIsMuted;

	return false;
}

//*****************************************************************************
//
// [AK] VOIPController::VOIPChannel::ShouldPlayIn3DMode
//
// Checks if the VoIP channel should be played in 3D mode. To do so, proximity
// chat must be enabled while in a level, and the player can't be spectating or
// be spied on by the local player.
//
//*****************************************************************************

bool VOIPController::VOIPChannel::ShouldPlayIn3DMode( void ) const
{
	if (( sv_proximityvoicechat == false ) || ( gamestate != GS_LEVEL ) || ( PLAYER_IsValidPlayer( ulPlayer ) == false ))
		return false;

	return (( players[ulPlayer].bSpectating == false ) && ( players[ulPlayer].mo != nullptr ) && ( players[ulPlayer].mo != players[consoleplayer].camera ));
}

//*****************************************************************************
//
// [AK] VOIPController::VOIPChannel::DecodeOpusFrame
//
// Decodes a single audio frame using the Opus audio codec, and returns the
// number of bytes decoded. If decoding fails, an error message is printed.
//
//*****************************************************************************

int VOIPController::VOIPChannel::DecodeOpusFrame( const unsigned char *pInBuffer, const unsigned int inLength, float *pOutBuffer, const unsigned int outLength )
{
	if (( pDecoder == nullptr ) || ( pInBuffer == nullptr ) || ( pOutBuffer == nullptr ))
		return 0;

	int numBytesDecoded = opus_decode_float( pDecoder, pInBuffer, inLength, pOutBuffer, outLength, 0 );

	// [AK] Print the error message if decoding failed.
	if ( numBytesDecoded <= 0 )
	{
		Printf( TEXTCOLOR_ORANGE "Failed to decode Opus audio frame: %s.\n", opus_strerror( numBytesDecoded ));
		return 0;
	}

	return numBytesDecoded;
}

//*****************************************************************************
//
// [AK] VOIPController::VOIPChannel::ReadSamples
//
// Stops playing the voice recording, clears the jitter buffer, and releases
// any memory the sound and/or channel was using.
//
//*****************************************************************************

static void voicechat_FloatToByteArray( const float value, unsigned char *pBytes )
{
	if ( pBytes == nullptr )
		return;

	union { DWORD l; float f; } dataUnion;
	dataUnion.f = value;

	for ( unsigned int byte = 0; byte < 4; byte++ )
		pBytes[byte] = ( dataUnion.l >> 8 * byte ) & 0xFF;
}

//*****************************************************************************
//
void VOIPController::VOIPChannel::ReadSamples( unsigned char *pSoundBuffer, const unsigned int length )
{
	const unsigned int samplesInBuffer = length / SAMPLE_SIZE;
	unsigned int samplesReadIntoBuffer = 0;

	// [AK] Read the extra samples into the sound buffer first. Make sure to
	// only read as many samples as what can fit in the sound buffer.
	if ( extraSamples.Size( ) > 0 )
	{
		const unsigned int maxExtraSamples = MIN<unsigned int>( extraSamples.Size( ), samplesInBuffer );

		for ( unsigned int i = 0; i < maxExtraSamples; i++ )
		{
			voicechat_FloatToByteArray( extraSamples[0], pSoundBuffer + i * SAMPLE_SIZE );
			extraSamples.Delete( 0 );
		}

		samplesReadIntoBuffer += maxExtraSamples;
	}

	// [AK] If there's still room left to read more samples, then start reading
	// frames from the jitter buffer. First, find how many frames are needed in
	// the sound buffer with respect to how many samples have already been read,
	// then determine how many frames can actually be read. It's possible that
	// there's less frames in the jitter buffer than what's required.
	if (( samplesReadIntoBuffer < samplesInBuffer ) && (( IsMuted( ) == false ) || ( playbackTick <= gametic )))
	{
		const unsigned int framesRequired = static_cast<unsigned int>( ceil( static_cast<float>( samplesInBuffer - samplesReadIntoBuffer ) / PLAYBACK_SAMPLES_PER_FRAME ));
		const unsigned int framesToRead = MIN<unsigned int>( framesRequired, jitterBuffer.Size( ));

		for ( unsigned int frame = 0; frame < framesToRead; frame++ )
		{
			for ( unsigned int i = 0; i < PLAYBACK_SAMPLES_PER_FRAME; i++ )
			{
				if ( samplesReadIntoBuffer < samplesInBuffer )
				{
					voicechat_FloatToByteArray( jitterBuffer[0].samples[i], pSoundBuffer + samplesReadIntoBuffer * SAMPLE_SIZE );
					samplesReadIntoBuffer++;
				}
				else
				{
					extraSamples.Push( jitterBuffer[0].samples[i] );
				}
			}

			lastFrameRead = jitterBuffer[0].frame;
			jitterBuffer.Delete( 0 );
		}
	}

	samplesRead += samplesReadIntoBuffer;

	if ( pChannel != nullptr )
	{
		unsigned int playbackPosition = 0;

		if ( pChannel->getPosition( &playbackPosition, FMOD_TIMEUNIT_PCM ) == FMOD_OK )
		{
			if ( IsMuted( ) == false )
			{
				unsigned int playbackDelta = 0;

				if ( playbackPosition >= lastPlaybackPosition )
					playbackDelta = playbackPosition - lastPlaybackPosition;
				else
					playbackDelta = playbackPosition + PLAYBACK_SOUND_LENGTH - lastPlaybackPosition;

				samplesPlayed += playbackDelta;
				lastPlaybackPosition = playbackPosition;

				// [AK] If there's no more audio frames left in the jitter buffer
				// and more samples have been played than read, mute the channel.
				if (( jitterBuffer.Size( ) == 0 ) && ( samplesRead <= samplesPlayed ))
				{
					pChannel->setMute( true );
					timeoutTick = gametic + CHANNEL_TIMEOUT_TICKS;
					lastFrameRead = samplesRead = samplesPlayed = 0;

					memset( pSoundBuffer, 0, length );
				}
			}
			else if ( samplesRead > samplesPlayed )
			{
				pChannel->setMute( false );
				lastPlaybackPosition = playbackPosition;
			}
			else
			{
				memset( pSoundBuffer, 0, length );
			}
		}
	}
}

//*****************************************************************************
//
// [AK] VOIPController::VOIPChannel::Update3DAttributes
//
// Updates a VoIP channel's 3D attributes: the position and velocity of the
// player corresponding to that channel. This only matters when the channel is
// playing in 3D mode.
//
//*****************************************************************************

void VOIPController::VOIPChannel::Update3DAttributes( void )
{
	if (( PLAYER_IsValidPlayer( ulPlayer ) == false ) || ( pChannel == nullptr ))
		return;

	FMOD_VECTOR pos = { 0.0f, 0.0f, 0.0f };
	FMOD_VECTOR vel = { 0.0f, 0.0f, 0.0f };

	if ( players[ulPlayer].mo != nullptr )
	{
		pos.x = FIXED2FLOAT( players[ulPlayer].mo->x );
		pos.y = FIXED2FLOAT( players[ulPlayer].mo->z );
		pos.z = FIXED2FLOAT( players[ulPlayer].mo->y );

		vel.x = FIXED2FLOAT( players[ulPlayer].mo->velx );
		vel.y = FIXED2FLOAT( players[ulPlayer].mo->velz );
		vel.z = FIXED2FLOAT( players[ulPlayer].mo->vely );
	}

	if ( pChannel->set3DAttributes( &pos, &vel ) != FMOD_OK )
		Printf( TEXTCOLOR_ORANGE "Failed to set 3D attributes for VoIP channel %u.\n", ulPlayer );
}

#endif // NO_SOUND

//*****************************************************************************
//
// [AK] VOICECHAT_IsPlayerTalking
//
// A wrapper function that either calls VOIPController::IsPlayerTalking if the
// macro NO_SOUND isn't defined, or always returns false.
//
//*****************************************************************************

bool VOICECHAT_IsPlayerTalking( const ULONG ulPlayer )
{
#ifndef NO_SOUND
	return VOIPController::GetInstance( ).IsPlayerTalking( ulPlayer );
#else
	return false;
#endif
}

//*****************************************************************************
//	STATISTICS

#ifndef NO_SOUND

ADD_STAT( voice )
{
	return VOIPController::GetInstance( ).GrabStats( );
}

#endif // NO_SOUND
