/** 
 * @file  stringdiffs.cpp
 *
 * @brief Implementation file for sd_ComputeWordDiffs (q.v.)
 *
 */
// RCS ID line follows -- this is updated by CVS
// $Id: stringdiffs.cpp 5414 2008-06-02 13:07:47Z kimmov $

#include "stdafx.h"
#include <mbctype.h>
#include "stringdiffs.h"
#include "CompareOptions.h"
#include "stringdiffsi.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

static bool isSafeWhitespace(TCHAR ch);
static bool isWordBreak(int breakType, TCHAR ch);
static int make3wayDiff(wdiffarray &diff3, wdiffarray &diff10, wdiffarray &diff12);
static void wordLevelToByteLevel(wdiffarray * pDiffs, const CString& str1, const CString& str2, bool casitive, int xwhite);

/**
 * @brief Construct our worker object and tell it to do the work
 */
void
sd_ComputeWordDiffs(int nFiles, const CString str[3],
	bool case_sensitive, int whitespace, int breakType, bool byte_level,
	wdiffarray * pDiffs)
{
	if (nFiles == 2)
	{
		stringdiffs sdiffs(str[0], str[1], case_sensitive, whitespace, breakType, pDiffs);
		// Hash all words in both lines and then compare them word by word
		// storing differences into m_wdiffs
		sdiffs.BuildWordDiffList();
		// Now copy m_wdiffs into caller-supplied m_pDiffs (coalescing adjacents if possible)
		sdiffs.PopulateDiffs();

		if (byte_level)
			wordLevelToByteLevel(pDiffs, str[0], str[1], case_sensitive, whitespace);
	}
	else
	{
		if (str[0].IsEmpty())
		{
			stringdiffs sdiffs(str[1], str[2], case_sensitive, whitespace, breakType, pDiffs);
			sdiffs.BuildWordDiffList();
			sdiffs.PopulateDiffs();
			if (byte_level)
				wordLevelToByteLevel(pDiffs, str[1], str[2], case_sensitive, whitespace);
			for (int i = 0; i < pDiffs->GetSize(); i++)
			{
				wdiff *pDiff = &(*pDiffs)[i];
				pDiff->begin[2] = pDiff->begin[1];
				pDiff->begin[1] = pDiff->begin[0];
				pDiff->begin[0] = 0;
				pDiff->end[2] = pDiff->end[1];
				pDiff->end[1] = pDiff->end[0];
				pDiff->end[0] = -1;
			}
		}
		else if (str[1].IsEmpty())
		{
			stringdiffs sdiffs(str[0], str[2], case_sensitive, whitespace, breakType, pDiffs);
			sdiffs.BuildWordDiffList();
			sdiffs.PopulateDiffs();
			if (byte_level)
				wordLevelToByteLevel(pDiffs, str[0], str[2], case_sensitive, whitespace);
			for (int i = 0; i < pDiffs->GetSize(); i++)
			{
				wdiff *pDiff = &(*pDiffs)[i];
				pDiff->begin[2] = pDiff->begin[1];
				pDiff->begin[0] = pDiff->begin[0];
				pDiff->begin[1] = 0;
				pDiff->end[2] = pDiff->end[1];
				pDiff->end[0] = pDiff->end[0];
				pDiff->end[1] = -1;
			}
		}
		else if (str[2].IsEmpty())
		{
			stringdiffs sdiffs(str[0], str[1], case_sensitive, whitespace, breakType, pDiffs);
			sdiffs.BuildWordDiffList();
			sdiffs.PopulateDiffs();
			if (byte_level)
				wordLevelToByteLevel(pDiffs, str[0], str[1], case_sensitive, whitespace);
			for (int i = 0; i < pDiffs->GetSize(); i++)
			{
				wdiff *pDiff = &(*pDiffs)[i];
				pDiff->begin[1] = pDiff->begin[1];
				pDiff->begin[0] = pDiff->begin[0];
				pDiff->begin[2] = 0;
				pDiff->end[1] = pDiff->end[1];
				pDiff->end[0] = pDiff->end[0];
				pDiff->end[2] = -1;
			}
		}
		else
		{
			wdiffarray diffs10, diffs12, diffs02;
			stringdiffs sdiffs10(str[1], str[0], case_sensitive, whitespace, breakType, &diffs10);
			stringdiffs sdiffs12(str[1], str[2], case_sensitive, whitespace, breakType, &diffs12);
			// Hash all words in both lines and then compare them word by word
			// storing differences into m_wdiffs
			sdiffs10.BuildWordDiffList();
			sdiffs12.BuildWordDiffList();
			// Now copy m_wdiffs into caller-supplied m_pDiffs (coalescing adjacents if possible)
			sdiffs10.PopulateDiffs();
			sdiffs12.PopulateDiffs();
			if (byte_level)
			{
				wordLevelToByteLevel(&diffs10, str[1], str[0], case_sensitive, whitespace);
				wordLevelToByteLevel(&diffs12, str[1], str[2], case_sensitive, whitespace);
			}

			make3wayDiff(*pDiffs, diffs10, diffs12);
		}
	}
}

/**
 * @brief stringdiffs constructor simply loads all members from arguments
 */
stringdiffs::stringdiffs(const CString & str1, const CString & str2,
	bool case_sensitive, int whitespace, int breakType,
	wdiffarray * pDiffs)
: m_str1(str1)
, m_str2(str2)
, m_case_sensitive(case_sensitive)
, m_whitespace(whitespace)
, m_breakType(breakType)
, m_pDiffs(pDiffs)
{
}

/**
 * @brief Add all different elements between lines to the wdiff list
 */
void
stringdiffs::BuildWordDiffList()
{
	BuildWordsArray(m_str1, &m_words1);
	BuildWordsArray(m_str2, &m_words2);

	int w1=0, w2=0; // next word
	int bw1, bw2; // internal temporaries

// We don't have a difference accumulated right now
insame:
	if (w1 == m_words1.GetSize() || w2 == m_words2.GetSize())
	{
		int i1 = (w1>0 ? m_words1[w1-1].end+1 : 0); // after end of word before w1
		int i2 = (w2>0 ? m_words2[w2-1].end+1 : 0); // after end of word before w2
		// Done, but handle trailing spaces
		if (m_whitespace==0)
		{
			while (i1 < m_str1.GetLength() && i2 < m_str2.GetLength())
			{
				// Compare all whitespace
				if (!caseMatch(m_str1[i1], m_str2[i2]))
					break;
				++i1;
				++i2;
			}
		}
		else
		{
			while (i1 < m_str1.GetLength() && isSafeWhitespace(m_str1[i1]))
				i1++;
			while (i2 < m_str2.GetLength() && isSafeWhitespace(m_str2[i2]))
				i2++;
		}
		if (i1 != m_str1.GetLength() || i2 != m_str2.GetLength())
		{
			/*if (i1 > 0 && m_str1[i1-1] == '\r' && m_str1[i1] == '\n')
			{
				i1--;
				i2--;
			}
			if (i2 > 0 && m_str2[i2-1] == '\r' && m_str2[i2] == '\n')
			{
				i2--;
				i1--;
			}*/
			wdiff wdf(i1,  m_str1.GetLength()-1, i2, m_str2.GetLength()-1);
			m_wdiffs.Add(wdf);
		}
		return;
	}
	// Check whitespace before current words for difference, if appropriate
	if (m_whitespace==0 || m_whitespace==1)
	{
		// Compare all whitespace
		int i1 = (w1>0 ? m_words1[w1-1].end+1 : 0); // after end of word before w1
		int i2 = (w2>0 ? m_words2[w2-1].end+1 : 0); // after end of word before w2
		int si1 = i1;
		int si2 = i2;
		while (i1<m_words1[w1].start || i2<m_words2[w2].start)
		{
			if (i1==m_words1[w1].start || i2==m_words2[w2].start
				|| m_str1[i1] != m_str2[i2])
			{
				// Found a difference
				break;
			}
			// Not difference, keep looking
			++i1;
			++i2;
		}
		if (i1<m_words1[w1].start || i2<m_words2[w2].start)
		{
			// Found a difference
			// Now backtrack from next word to find end of difference
			int e1 = m_words1[w1].start-1;
			int e2 = m_words2[w2].start-1;
			int ee1 = e1;
			int ee2 = e2;
			while (e1>i1 && e2>i2)
			{
				if (m_str1[e1] != m_str2[e2])
				{
					// Found a difference
					break;
				}
				// Not difference, keep looking
				--e1;
				--e2;
			}
			// Add the difference we've found
			if (m_whitespace == 0 || (m_whitespace == 1 && (ee1 - si1) == -1 || (ee2 - si2) == -1))
			{
				wdiff wdf(i1, e1, i2, e2);
				m_wdiffs.Add(wdf);
			}
		}
		
	}
	// Now check current words for difference
	if (!AreWordsSame(m_words1[w1], m_words2[w2]))
		goto startdiff;
	++w1;
	++w2;
	goto insame; // safe even if at the end of one line's words

// Just beginning a difference
startdiff:
	bw1 = w1;
	bw2 = w2;

// Currently in a difference
// Actually we don't have a label here, because we don't loop to here
// We always find the end of the difference and jump straight to it

	if (!findSync(&w1, &w2))
	{
		int i = 0;
		for (;;)
		{
			// Add a diff from bw1 & bw2 to end of both lines
			int s1 = m_words1[bw1+i].start;
			int e1;
			int s2 = m_words2[bw2+i].start;
			int e2;
			int pe1;
			int pe2;
			if (m_whitespace == 0)
			{
				// Grab all the trailing whitespace for our diff
				if (bw1 + i < m_words1.GetUpperBound() && bw2 + i < m_words2.GetUpperBound())
				{
					e1 = m_words1[bw1+i+1].start-1;
					e2 = m_words2[bw2+i+1].start-1;
					// Now backtrack over matching whitespace
					pe1 = m_words1[bw1+i].end;
					pe2 = m_words2[bw2+i].end;
					while (e1 > pe1 && e1 >= s1
						&& e2 > pe2 && e2 >= s2
						&& m_str1[e1] == m_str2[e2])
					{
						--e1;
						--e2;
					}
				}
				else
				{
					e1 = m_words1[m_words1.GetUpperBound()].end;
					e2 = m_words2[m_words2.GetUpperBound()].end;
				}
			}
			else
			{
				// ignore whitespace, so leave it out of diff
				if (bw1 + i < m_words1.GetUpperBound() && bw2 + i < m_words2.GetUpperBound())
				{
					e1 = m_words1[bw1+i].end;
					e2 = m_words2[bw2+i].end;
				}
				else
				{
					e1 = m_words1[m_words1.GetUpperBound()].end;
					e2 = m_words2[m_words2.GetUpperBound()].end;
				}
			}
			wdiff wdf(s1, e1, s2, e2);
			m_wdiffs.Add(wdf);

			if (bw1 + i >= m_words1.GetUpperBound() || bw2 + i >= m_words2.GetUpperBound())
				break;

			i++;
		}
		// Now skip directly to end of last word in each line
		w1 = m_words1.GetSize();
		w2 = m_words2.GetSize();
		// go to process trailing spaces and quit
		goto insame;
	}
	else
	{
		// NB: To get into indiff, must be at least one different word
		// To reach here, must also be be a sync
		// So there is a word at the start of this diff, and a word after us
		// w1 is valid because it is the word after us
		// w1-1 >= bw1 is valid because it is the word at the start of this diff

		// Add a diff from start to just before sync word
		int i = 0;
		for (;;)
		{
			int s1 = m_words1[bw1+i].start;
			int e1 = 0; // placeholder, set below
			int s2 = m_words2[bw2+i].start;
			int e2 = 0; // placeholder, set below
			int pe1;
			int pe2;
			if (m_whitespace == 0)
			{
				// Grab all the trailing whitespace for our diff
				if (bw1 + i < w1-1 && bw2 + i < w2-1)
				{
					e1 = m_words1[bw1+i+1].start-1;
					e2 = m_words2[bw2+i+1].start-1;
					// Now backtrack over matching whitespace
					pe1 = m_words1[bw1+i].end;
					pe2 = m_words2[bw2+i].end;
				}
				else
				{
					e1 = m_words1[w1].start-1;
					e2 = m_words2[w2].start-1;
					// Now backtrack over matching whitespace
					pe1 = (w1 ? m_words1[w1-1].end : -1);
					pe2 = (w2 ? m_words2[w2-1].end : -1);
				}
				while (e1 > pe1 && e1 >= s1
					&& e2 > pe2 && e2 >= s2
					&& m_str1[e1] == m_str2[e2])
				{
					--e1;
					--e2;
				}
			}
			else
			{
				// ignore whitespace, so leave it out of diff
				if (bw1 + i < w1-1 && bw2 + i < w2-1)
				{
					e1 = m_words1[bw1+i].end;
					e2 = m_words2[bw2+i].end;
				}
				else
				{
					e1 = (w1 ? m_words1[w1-1].end : -1);
					e2 = (w2 ? m_words2[w2-1].end : -1);
				}
			}
			wdiff wdf(s1, e1, s2, e2);
			m_wdiffs.Add(wdf);

			if (bw1+i >= w1-1 || bw2+i >= w2-1)
				break;
			
			i++;
		}
		// skip past sync words (which we already know match)
		++w1;
		++w2;
		// go process sync
		goto insame; // safe even if at the end of one line's words
	}
}

/**
 * @brief Find closest matching word
 */
bool
stringdiffs::findSync(int *w1, int *w2) const
{
	// Look among remaining words in m_words2 for a word that matches w1
	int cw2 = -1;
	while (*w1 < m_words1.GetSize())
	{
		cw2 = FindNextMatchInWords2(m_words1[*w1], *w2);
		if (cw2>=0)
			break;
		// No word matches w1
		++(*w1);
	}
	// Look among remaining words in m_words1 for a word that matches w2
	int cw1 = -1;
	while (*w2 < m_words2.GetSize())
	{
		cw1 = FindNextMatchInWords1(m_words2[*w2], *w1);
		if (cw1>=0)
			break;
		// No word matches w2
		++(*w2);
	}
	if (cw1 == -1)
	{
		if (cw2 ==-1)
			return false;
		*w2 = cw2;
	}
	else
	{
		if (cw2 ==-1)
		{
			*w1 = cw1;
		}
		else
		{
			// We have candidates advancing along either string
			// Pick closer
			int len1 = m_words1[cw1].end - m_words1[cw1].start;
			int len2 = m_words2[cw2].end - m_words2[cw2].start;
			if (len1 == len2)
			{
				if (m_words1[cw1].end - m_words1[*w1].start < m_words2[cw2].end - m_words2[*w2].start)
					*w1 = cw1;
				else
					*w2 = cw2;
			}
			else if (len1 > len2)
				*w1 = cw1;
			else
				*w2 = cw2;
		}
	}
	return true;
}

/**
 * @brief Find next word in m_words2 (starting at bw2) that matches needword1 (in m_words1)
 */
int 
stringdiffs::FindNextMatchInWords2(const word & needword1, int bw2) const
{
	while (bw2 < m_words2.GetSize())
	{
		if (AreWordsSame(needword1, m_words2[bw2]))
			return bw2;
		++bw2;
	}
	return -1;
}

/**
 * @brief Find next word in m_words1 (starting at bw1) that matches needword2 (in m_words2)
 */
int 
stringdiffs::FindNextMatchInWords1(const word & needword2, int bw1) const
{
	while (bw1 < m_words1.GetSize())
	{
		if (AreWordsSame(m_words1[bw1], needword2))
			return bw1;
		++bw1;
	}
	return -1;
}

/**
 * @brief Break line into constituent words
 */
void
stringdiffs::BuildWordsArray(const CString & str, wordarray * words)
{
	int i=0, begin=0;

	// state when we are looking for next word
inspace:
	if (i==str.GetLength())
		return;
	if (isSafeWhitespace(str[i])) 
	{
		++i;
		goto inspace;
	}
	begin = i;
	goto inword;

	// state when we are inside a word
inword:
	bool atspace=false;
	if (i==str.GetLength() || (atspace=isSafeWhitespace(str[i])) || isWordBreak(m_breakType, str[i]))
	{
		if (begin<i)
		{
			// just finished a word
			// e is first non-word character (space or at end)
			int e = i-1;
			word wd(begin, e, hash(str, begin, e));
			words->Add(wd);
		}
		if (i == str.GetLength())
		{
			return;
		}
		else if (atspace)
		{
			goto inspace;
		}
		else
		{
			// start a new word because we hit a non-whitespace word break (eg, a comma)
			// but, we have to put each word break character into its own word
			word wd(i, i, hash(str, i, i));
			words->Add(wd);
			++i;
			begin = i;
			goto inword;
		}
	}
	++i;
	goto inword; // safe even if we're at the end or no longer in a word
}

/**
 * @brief Populate m_pDiffs from m_wdiffs (combining adjacent diffs)
 *
 * Doing the combining of adjacent diffs here keeps some complexity out of BuildWordsArray.
 */
void
stringdiffs::PopulateDiffs()
{
	for (int i=0; i<m_wdiffs.GetSize(); ++i)
	{
		bool skipIt = false;
		// combine it with next ?
		if (i+1<m_wdiffs.GetSize())
		{
			if (m_wdiffs[i].end[0] == m_wdiffs[i+1].begin[0]
				&& m_wdiffs[i].end[1] == m_wdiffs[i+1].begin[1])
			{
				// diff[i] and diff[i+1] are contiguous
				// so combine them into diff[i+1] and ignore diff[i]
				m_wdiffs[i+1].begin[0] = m_wdiffs[i].begin[0];
				m_wdiffs[i+1].begin[1] = m_wdiffs[i].begin[1];
				skipIt = true;
			}
		}
		if (!skipIt)
		{
			// Should never have a pair where both are missing
			ASSERT(m_wdiffs[i].begin[0]>=0 || m_wdiffs[i].begin[1]>=0);

			// Store the diff[i] in the caller list (m_pDiffs)
			wdiff dr(m_wdiffs[i]);
			m_pDiffs->Add(dr);
		}
	}
}

// diffutils hash

/* Rotate a value n bits to the left. */
#define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
#define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
/* Given a hash value and a new character, return a new hash value. */
#define HASH(h, c) ((c) + ROL (h, 7))

int
stringdiffs::hash(const CString & str, int begin, int end) const
{
	UINT h = 0;
	for (int i=begin; i<end; ++i)
	{
		UINT ch = (UINT)str[i];
		if (m_case_sensitive)
		{
			h += HASH(h, ch);
		}
		else
		{
			ch = (UINT)_totupper(ch);
			h += HASH(h, ch);
		}
	}

	return h;
}

/**
 * @brief Compare two words (by reference to original strings)
 */
bool
stringdiffs::AreWordsSame(const word & word1, const word & word2) const
{
	if (word1.hash != word2.hash)
		return false;
	if (word1.length() != word2.length())
		return false;
	for (int i=0; i<word1.length(); ++i)
	{
		if (!caseMatch(m_str1[word1.start+i], m_str2[word2.start+i]))
			return false;
	}
	return true;
}

/**
 * @brief Return true if characters match
 */
bool
stringdiffs::caseMatch(TCHAR ch1, TCHAR ch2) const
{
	if (m_case_sensitive) 
		return ch1==ch2;
	else 
		return _totupper(ch1)==_totupper(ch2);
}

/**
 * @brief Return true if chars match
 *
 * Caller must not call this for lead bytes
 */
static bool
matchchar(TCHAR ch1, TCHAR ch2, bool casitive)
{
	if (casitive)
		return ch1==ch2;
	else 
		return _totupper(ch1)==_totupper(ch2);
}


/** Does character introduce a multicharacter character? */
static inline bool IsLeadByte(TCHAR ch)
{
#ifdef UNICODE
	return false;
#else
	return _getmbcp() && IsDBCSLeadByte(ch);
#endif
}

/**
 * @brief Is it whitespace (excludes all lead & trail bytes)?
 */
static inline bool
isSafeWhitespace(TCHAR ch)
{
	return xisspace(ch) && !IsLeadByte(ch);
}

/**
 * @brief Is it a non-whitespace wordbreak character (ie, punctuation)?
 */
static bool
isWordBreak(int breakType, TCHAR ch)
{
	// breakType==0 means whitespace only
	if (!breakType) return false;
	// breakType==1 means break also on punctuation
#ifdef _UNICODE
	if ((ch & 0xff00) == 0)
		return !isalnum(ch);
	else if ((ch & 0xff00) == 0x3000)
	{
		WORD wCharType;
		GetStringTypeW(CT_CTYPE3, &ch, 1, &wCharType);
		return
			ch==0x3002/* Ideographic Full Stop */ || 
			ch==0x3001/* Ideographic Comma */ ||
			ch & C3_HIRAGANA;
	}
	else if ((ch & 0xff00) == 0xff00)
		return
			ch==0xff0c/* Fullwidth Full Stop */ || 
			ch==0xff0e/* Fullwidth Comma */ ||
			ch==0xff1b/* Fullwidth Semicolon */ ||
			ch==0xff1a/* Fullwidth Colon */;
	return false;
#else
	return ch==',' || ch==';' || ch==':' || ch=='.';
#endif
}


/**
 * @brief Return pointer to last character of specified string (handle MBCS)
 *
 * If the last byte is a broken multibyte (ie, a solo lead byte), this returns previous char
 */
static LPCTSTR
LastChar(LPCTSTR psz, int len)
{
	if (!len) return psz;

	if (!_getmbcp()) return psz+len-1;

	LPCTSTR lastValid = psz+len-1;

	LPCTSTR prev=psz;
	while (psz<lastValid)
	{
		prev = psz;
		psz = CharNext(psz);
		if (prev == psz)
			psz++;
	}
	if (psz==lastValid && !IsLeadByte(*psz))
		return psz;
	else // last character was multibyte or broken multibyte
		return prev;
}

/**
 * @brief advance current pointer over whitespace, until not whitespace or beyond end
 * @param pcurrent [in,out] current location (to be advanced)
 * @param end [in] last valid position (only go one beyond this)
 */
static void
AdvanceOverWhitespace(LPCTSTR * pcurrent, LPCTSTR end)
{
	// advance over whitespace
	while (*pcurrent <= end && isSafeWhitespace(**pcurrent))
		++(*pcurrent); // DBCS safe because of isSafeWhitespace above
}

/**
 * @brief back current pointer over whitespace, until not whitespace or at start
 * @param pcurrent [in,out] current location (to be backed up)
 * @param start [in] first valid position (do not go before this)
 *
 * NB: Unlike AdvanceOverWhitespace, this will not go over the start
 * This because WinMerge doesn't need to, and also CharPrev cannot easily do so
 */
static void
RetreatOverWhitespace(LPCTSTR * pcurrent, LPCTSTR start)
{
	// back over whitespace
	while (*pcurrent > start && isSafeWhitespace(**pcurrent))
		*pcurrent = CharPrev(start, *pcurrent); // DBCS safe because of isSafeWhitespace above
}

/**
 * @brief Compute begin1,begin2,end1,end2 to display byte difference between strings str1 & str2
 * @param casitive [in] true for case-sensitive, false for case-insensitive
 * @param xwhite [in] This governs whether we handle whitespace specially (see WHITESPACE_COMPARE_ALL, WHITESPACE_IGNORE_CHANGE, WHITESPACE_IGNORE_ALL)
 *
 * Assumes whitespace is never leadbyte or trailbyte!
 */
void
sd_ComputeByteDiff(CString & str1, CString & str2, 
		   bool casitive, int xwhite, 
		   int begin[2], int end[2])
{
	// Set to sane values
	// Also this way can distinguish if we set begin[0] to -1 for no diff in line
	begin[0] = end[0] = begin[1] = end[1] = 0;

	int len1 = str1.GetLength();
	int len2 = str2.GetLength();

	LPCTSTR pbeg1 = (LPCTSTR)str1;
	LPCTSTR pbeg2 = (LPCTSTR)str2;

	if (len1 == 0 || len2 == 0)
	{
		if (len1 == len2)
		{
			begin[0] = -1;
			begin[1] = -1;
		}
		end[0] = len1 - 1;
		end[1] = len2 - 1;
		return;
	}

	// cursors from front, which we advance to beginning of difference
	LPCTSTR py1 = pbeg1;
	LPCTSTR py2 = pbeg2;

	// pen1,pen2 point to the last valid character (broken multibyte lead chars don't count)
	LPCTSTR pen1 = LastChar(py1, len1);
	LPCTSTR pen2 = LastChar(py2, len2);

	if (xwhite != WHITESPACE_COMPARE_ALL)
	{
		// Ignore leading and trailing whitespace
		// by advancing py1 and py2
		// and retreating pen1 and pen2
		while (py1 < pen1 && isSafeWhitespace(*py1))
			++py1; // DBCS safe because of isSafeWhitespace above
		while (py2 < pen2 && isSafeWhitespace(*py2))
			++py2; // DBCS safe because of isSafeWhitespace above
		if ((pen1 < pbeg1 + len1 - 1 || pen2 < pbeg2 + len2 -1)
			&& (!len1 || !len2 || pbeg1[len1] != pbeg2[len2]))
		{
			// mismatched broken multibyte ends
		}
		else
		{
			while (pen1 > py1 && isSafeWhitespace(*pen1))
				pen1 = CharPrev(py1, pen1);
			while (pen2 > py2 && isSafeWhitespace(*pen2))
				pen2 = CharPrev(py2, pen2);
		}
	}

	// Advance over matching beginnings of lines
	// Advance py1 & py2 from beginning until find difference or end
	while (1)
	{
		// Potential difference extends from py1 to pen1 and py2 to pen2

		// Check if either side finished
		if (py1 > pen1 && py2 > pen2)
		{
			begin[0] = end[0] = begin[1] = end[1] = -1;
			break;
		}
		if (py1 > pen1 || py2 > pen2)
		{
			break;
		}

		// handle all the whitespace logic (due to WinMerge whitespace settings)
		if (xwhite && py1 < pen1 && isSafeWhitespace(*py1))
		{
			if (xwhite==WHITESPACE_IGNORE_CHANGE && !isSafeWhitespace(*py2))
			{
				// py1 is white but py2 is not
				// in WHITESPACE_IGNORE_CHANGE mode,
				// this doesn't qualify as skippable whitespace
				break; // done with forward search
			}
			// gobble up all whitespace in current area
			AdvanceOverWhitespace(&py1, pen1); // will go beyond end
			AdvanceOverWhitespace(&py2, pen2); // will go beyond end
			continue;

		}
		if (xwhite && py2 < pen2 && isSafeWhitespace(*py2))
		{
			if (xwhite==WHITESPACE_IGNORE_CHANGE && !isSafeWhitespace(*py1))
			{
				// py2 is white but py1 is not
				// in WHITESPACE_IGNORE_CHANGE mode,
				// this doesn't qualify as skippable whitespace
				break; // done with forward search
			}
			// gobble up all whitespace in current area
			AdvanceOverWhitespace(&py1, pen1); // will go beyond end
			AdvanceOverWhitespace(&py2, pen2); // will go beyond end
			continue;
		}

		// Now do real character match
		if (IsLeadByte(*py1))
		{
			if (!IsLeadByte(*py2))
				break; // done with forward search
			// DBCS (we assume if a lead byte, then character is 2-byte)
			if (!(py1[0] == py2[0] && py1[1] == py2[1]))
				break; // done with forward search
			py1 += 2; // DBCS specific
			py2 += 2; // DBCS specific
		}
		else
		{
			if (IsLeadByte(*py2))
				break; // done with forward search
			if (!matchchar(py1[0], py2[0], casitive))
				break; // done with forward search
			++py1; // DBCS safe b/c we checked above
			++py2; // DBCS safe b/c we checked above
		}
	}

	// Potential difference extends from py1 to pen1 and py2 to pen2

	// Store results of advance into return variables (begin[0] & begin[1])
	// -1 in a begin variable means no visible diff area
	begin[0] = py1 - pbeg1;
	begin[1] = py2 - pbeg2;

	LPCTSTR pz1 = pen1;
	LPCTSTR pz2 = pen2;

	// Retreat over matching ends of lines
	// Retreat pz1 & pz2 from end until find difference or beginning
	while (1)
	{
		// Check if either side finished
		if (pz1 < py1 && pz2 < py2)
		{
			begin[0] = end[0] = begin[1] = end[1] = -1;
			break;
		}
		if (pz1 < py1 || pz2 < py2)
		{
			break;
		}

		// handle all the whitespace logic (due to WinMerge whitespace settings)
		if (xwhite && pz1 > py1 && isSafeWhitespace(*pz1))
		{
			if (xwhite==1 && !isSafeWhitespace(*pz2))
				break; // done with reverse search
			// gobble up all whitespace in current area
			while (pz1 > py1 && isSafeWhitespace(*pz1))
				pz1 = CharPrev(py1, pz1);
			while (pz2 > py2 && isSafeWhitespace(*pz2))
				pz2 = CharPrev(py2, pz2);
			continue;

		}
		if (xwhite && pz2 > py2 && isSafeWhitespace(*pz2))
		{
			if (xwhite==1)
				break; // done with reverse search
			while (pz2 > py2 && isSafeWhitespace(*pz2))
				pz2 = CharPrev(py2, pz2);
			continue;
		}

		// Now do real character match
		if (IsLeadByte(*pz1))
		{
			if (!IsLeadByte(*pz2))
				break; // done with forward search
			// DBCS (we assume if a lead byte, then character is 2-byte)
			if (!(pz1[0] == pz2[0] && pz1[1] == pz2[1]))
				break; // done with forward search
		}
		else
		{
			if (IsLeadByte(*pz2))
				break; // done with forward search
			if (!matchchar(pz1[0], pz2[0], casitive))
				break; // done with forward search
		}
		pz1 = (pz1 > py1) ? CharPrev(py1, pz1) : pz1 - 1;
		pz2 = (pz2 > py2) ? CharPrev(py2, pz2) : pz2 - 1;
	}

/*	if (*pz1 == '\r' && *(pz1+1) == '\n')
	{
		pz1++;
		pz2++;
	}
	else if (*pz2 == '\r' && *(pz2+1) == '\n')
	{
		pz2++;
		pz1++;
	}
	if (*(pbeg1-1) == '\r' && *pbeg1 == '\n')
	{
		pbeg1--;
		pbeg2--;
	}
	else if (*(pbeg2-1) == '\r' && *pbeg2 == '\n')
	{
		pbeg2--;
		pbeg1--;
	}*/

	// Store results of advance into return variables (end[0] & end[1])
	end[0] = pz1 - pbeg1;
	end[1] = pz2 - pbeg2;

	// Check if difference region was empty
	if (begin[0] == end[0] + 1 && begin[1] == end[1] + 1)
		begin[0] = -1; // no diff
}

/* diff3 algorithm. It is almost the same as GNU diff3's algorithm */
static int make3wayDiff(wdiffarray &diff3, wdiffarray &diff10, wdiffarray &diff12)
{
	int diff10count = diff10.GetSize();
	int diff12count = diff12.GetSize();

	int diff10i = 0;
	int diff12i = 0;
	int diff3i = 0;

	int diff10itmp;
	int diff12itmp;

	bool lastDiffBlockIsDiff12;
	bool firstDiffBlockIsDiff12;

	wdiff dr3, dr10, dr12, dr10first, dr10last, dr12first, dr12last;
	wdiffarray diff3tmp;

	int linelast0 = 0;
	int linelast1 = 0;
	int linelast2 = 0;

	for (;;)
	{
		if (diff10i >= diff10count && diff12i >= diff12count)
			break;

		/* 
		 * merge overlapped diff blocks
		 * diff10 is diff blocks between file1 and file0.
		 * diff12 is diff blocks between file1 and file2.
		 *
		 *                      diff12
		 *                 diff10            diff3
		 *                 |~~~|             |~~~|
		 * firstDiffBlock  |   |             |   |
		 *                 |   | |~~~|       |   |
		 *                 |___| |   |       |   |
		 *                       |   |   ->  |   |
		 *                 |~~~| |___|       |   |
		 * lastDiffBlock   |   |             |   |
		 *                 |___|             |___|
		 */

		if (diff10i >= diff10count && diff12i < diff12count)
		{
			dr12first = diff12.GetAt(diff12i);
			dr12last = dr12first;
			firstDiffBlockIsDiff12 = true;
		}
		else if (diff10i < diff10count && diff12i >= diff12count)
		{
			dr10first = diff10.GetAt(diff10i);
			dr10last = dr10first;
			firstDiffBlockIsDiff12 = false;
		}
		else
		{
			dr10first = diff10.GetAt(diff10i);	
			dr12first = diff12.GetAt(diff12i);	
			dr10last = dr10first;
			dr12last = dr12first;

			if (dr12first.begin[0] <= dr10first.begin[0])
				firstDiffBlockIsDiff12 = true;
			else
				firstDiffBlockIsDiff12 = false;
		}
		lastDiffBlockIsDiff12 = firstDiffBlockIsDiff12;

		diff10itmp = diff10i;
		diff12itmp = diff12i;
		for (;;)
		{
			if (diff10itmp >= diff10count || diff12itmp >= diff12count)
				break;

			dr10 = diff10.GetAt(diff10itmp);
			dr12 = diff12.GetAt(diff12itmp);

			if (dr10.end[0] == dr12.end[0])
			{
				diff10itmp++;
				lastDiffBlockIsDiff12 = true;

				dr10last = dr10;
				dr12last = dr12;
				break;
			}

			if (lastDiffBlockIsDiff12)
			{
				if (dr12.end[0] + 1 < dr10.begin[0])
					break;
			}
			else
			{
				if (dr10.end[0] + 1 < dr12.begin[0])
					break;
			}

			if (dr12.end[0] > dr10.end[0])
			{
				diff10itmp++;
				lastDiffBlockIsDiff12 = true;
			}
			else
			{
				diff12itmp++;
				lastDiffBlockIsDiff12 = false;
			}

			dr10last = dr10;
			dr12last = dr12;
		}

		if (lastDiffBlockIsDiff12)
			diff12itmp++;
		else
			diff10itmp++;

		if (firstDiffBlockIsDiff12)
		{
			dr3.begin[1] = dr12first.begin[0];
			dr3.begin[2] = dr12first.begin[1];
			if (diff10itmp == diff10i)
				dr3.begin[0] = dr3.begin[1] - linelast1 + linelast0;
			else
				dr3.begin[0] = dr3.begin[1] - dr10first.begin[0] + dr10first.begin[1];
		}
		else
		{
			dr3.begin[0] = dr10first.begin[1];
			dr3.begin[1] = dr10first.begin[0];
			if (diff12itmp == diff12i)
				dr3.begin[2] = dr3.begin[1] - linelast1 + linelast2;
			else
				dr3.begin[2] = dr3.begin[1] - dr12first.begin[0] + dr12first.begin[1];
		}

		if (lastDiffBlockIsDiff12)
		{
			dr3.end[1] = dr12last.end[0];
			dr3.end[2] = dr12last.end[1];
			if (diff10itmp == diff10i)
				dr3.end[0] = dr3.end[1] - linelast1 + linelast0;
			else
				dr3.end[0] = dr3.end[1] - dr10last.end[0] + dr10last.end[1];
		}
		else
		{
			dr3.end[0] = dr10last.end[1];
			dr3.end[1] = dr10last.end[0];
			if (diff12itmp == diff12i)
				dr3.end[2] = dr3.end[1] - linelast1 + linelast2;
			else
				dr3.end[2] = dr3.end[1] - dr12last.end[0] + dr12last.end[1];
		}

		linelast0 = dr3.end[0] + 1;
		linelast1 = dr3.end[1] + 1;
		linelast2 = dr3.end[2] + 1;

		diff3tmp.Add(dr3);

		TRACE(_T("left=%d,%d middle=%d,%d right=%d,%d\n"),
			dr3.begin[0], dr3.end[0], dr3.begin[1], dr3.end[1], dr3.begin[2], dr3.end[2]);

		diff3i++;
		diff10i = diff10itmp;
		diff12i = diff12itmp;
	}

	for (int i = 0; i < diff3i; i++)
	{
		dr3 = diff3tmp.GetAt(i);
		if (i < diff3i - 1)
		{
			wdiff dr3next = diff3tmp.GetAt(i + 1);
			for (int j = 0; j < 3; j++)
			{
				if (dr3.end[j] >= dr3next.begin[j])
					dr3.end[j] = dr3next.begin[j] - 1;
			}
		}
		diff3.Add(dr3);
	}
	return diff3i;
}

/**
 * @brief adjust the range of the specified word diffs down to byte level.
 * @param str1, str2 [in] line to be compared
 * @param casitive [in] true for case-sensitive, false for case-insensitive
 * @param xwhite [in] This governs whether we handle whitespace specially (see WHITESPACE_COMPARE_ALL, WHITESPACE_IGNORE_CHANGE, WHITESPACE_IGNORE_ALL)
 */
static void wordLevelToByteLevel(wdiffarray * pDiffs, const CString& str1, const CString& str2, bool casitive, int xwhite)
{
	for (int i = 0; i < pDiffs->GetSize(); i++)
	{
		int begin[3], end[3];
		wdiff *pDiff = &(*pDiffs)[i];
		CString str1_2, str2_2;
		str1_2 = str1.Mid(pDiff->begin[0], pDiff->end[0] - pDiff->begin[0] + 1);
		str2_2 = str2.Mid(pDiff->begin[1], pDiff->end[1] - pDiff->begin[1] + 1);
		sd_ComputeByteDiff(str1_2, str2_2, casitive, xwhite, begin, end);
		if (begin[0] == -1)
		{
			// no visible diff on side1
			pDiff->end[0] = pDiff->begin[0] - 1;
		}
		else
		{
			pDiff->end[0] = pDiff->begin[0] + end[0];
			pDiff->begin[0] += begin[0];
		}
		if (begin[1] == -1)
		{
			// no visible diff on side2
			pDiff->end[1] = pDiff->begin[1] - 1;
		}
		else
		{
			pDiff->end[1] = pDiff->begin[1] + end[1];
			pDiff->begin[1] += begin[1];
		}
	}
}
