// Scintilla source code edit control
/** @file UniConversion.h
 ** Functions to handle UTF-8 and UTF-16 strings.
 **/
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.

#ifndef UNICONVERSION_H
#define UNICONVERSION_H

#ifdef SCI_NAMESPACE
namespace Scintilla {
#endif

const int UTF8MaxBytes = 4;

const int unicodeReplacementChar = 0xFFFD;

size_t UTF8Length(const wchar_t *uptr, size_t tlen);
void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len);
unsigned int UTF8CharLength(unsigned char ch);
size_t UTF16Length(const char *s, size_t len);
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf);
std::string FixInvalidUTF8(const std::string &text);

#ifdef SCINTILLA_EXPORT
extern __declspec(dllexport) const unsigned char UTF8BytesOfLead[256];
#else
extern int UTF8BytesOfLead[256];
#endif
void UTF8BytesOfLeadInitialise();

#ifdef ADD_BY_JOJO // UTF8IsLeadByte
inline bool UTF8IsLeadByte(int ch) {
	assert(0 <= ch && ch <= 0xFF);
	return ch >= 0xC0;
}
#endif
inline bool UTF8IsTrailByte(int ch) {
#ifdef NIHONGO
	assert(0 <= ch && ch <= 0xFF);
#endif
	return (ch >= 0x80) && (ch < 0xc0);
}

inline bool UTF8IsAscii(int ch) {
#ifdef NIHONGO
	assert(0 <= ch && ch <= 0xFF);
#endif
	return ch < 0x80;
}

enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
#ifdef SCINTILLA_EXPORT
extern "C" __declspec(dllexport) int __stdcall UTF8Classify(const unsigned char *us, int len);
#else
int UTF8Classify(const unsigned char *us, int len);
#endif

// Similar to UTF8Classify but returns a length of 1 for invalid bytes
// instead of setting the invalid flag
#ifdef SCINTILLA_EXPORT
extern "C" __declspec(dllexport) int __stdcall UTF8DrawBytes(const unsigned char *us, int len);
#else
int UTF8DrawBytes(const unsigned char *us, int len);
#endif

// Line separator is U+2028 \xe2\x80\xa8
// Paragraph separator is U+2029 \xe2\x80\xa9
const int UTF8SeparatorLength = 3;
inline bool UTF8IsSeparator(const unsigned char *us) {
	return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) || (us[2] == 0xa9));
}

// NEL is U+0085 \xc2\x85
const int UTF8NELLength = 2;
inline bool UTF8IsNEL(const unsigned char *us) {
	return (us[0] == 0xc2) && (us[1] == 0x85);
}

enum { SURROGATE_LEAD_FIRST = 0xD800 };
enum { SURROGATE_LEAD_LAST = 0xDBFF };
enum { SURROGATE_TRAIL_FIRST = 0xDC00 };
enum { SURROGATE_TRAIL_LAST = 0xDFFF };
enum { SUPPLEMENTAL_PLANE_FIRST = 0x10000 };

inline unsigned int UTF16CharLength(wchar_t uch) {
	return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1;
}

#ifdef SCI_NAMESPACE
}
#endif

#endif
