#include "stdafx.h"

#include "JCodeUtil.h"

namespace NTextFinder {
namespace NativeLib {
namespace JCodeUtil {

    typedef unsigned char byte;

    const byte bESC = 0x1B;
    const byte bAT = 0x40;
    const byte bDollar = 0x24;
    const byte bAnd = 0x26;
    const byte bOP = 0x28;    //(
    const byte bB = 0x42;
    const byte bD = 0x44;
    const byte bJ = 0x4A;
    const byte bI = 0x49;

    Encoding GetEncoding( SCodeState& state, const unsigned char* buf, int begin, int end, int end_buf )
    {
        for (int i = begin; i < end; i++) {
            if (buf[i] <= 0x06 || buf[i] == 0x7F || buf[i] == 0xFF) {
                //'binary'
                state.binary++;
                if (0 < i && i < end - 1 && buf[i - 1] <= 0x7F && buf[i] == 0x00) {
                    //smells like raw unicode
                    state.ucs2++;
                }
            }
        }

        if (state.binary > 0) {
            if (state.ucs2 > 0) {
                //JIS
                //ucs2(Unicode)
                return ENC_UNICODE;
            } else {
                //binary
                //return null;
                return ENC_UTF8;
            }
        }

        for (int i = begin; i < end - 1; i++) {
            byte b1 = buf[i];
            byte b2 = buf[i + 1];

            if (b1 != bESC) { continue; }

            if (b2 >= 0x80) {
                //not Japanese
                //ASCII
                return ENC_ASCII;
            } else if (i + 2 < end_buf) {
                byte b3 = buf[i + 2];
                if (b2 == bDollar && b3 == bAT) {
                    //JIS_0208 1978 --> JIS
                    return ENC_JIS;
                } else if (b2 == bDollar && b3 == bB) {
                    //JIS_0208 1983 --> JIS
                    return ENC_JIS;
                } else if (i + 5 < end_buf && b2 == bAnd && b3 == bAT && buf[i + 3] == bESC && buf[i + 4] == bDollar && buf[i + 5] == bB) {
                    //JIS_0208 1990 --> JIS
                    return ENC_JIS;
                } else if (i + 3 < end_buf && b2 == bDollar && b3 == bOP && buf[i + 3] == bD) {
                    //JIS_0212 --> JIS
                    return ENC_JIS;
                } else if (b2 == bOP && (b3 == bB || b3 == bJ)) {
                    //JIS_ASC --> JIS
                    return ENC_JIS;
                } else if (b2 == bOP && b3 == bI) {
                    //JIS_KANA --> JIS
                    return ENC_JIS;
                }
            }
        }

        for (int i = begin; i < end - 1; i++) {
            byte b1 = buf[i];
            byte b2 = buf[i + 1];
            if (((0x81 <= b1 && b1 <= 0x9F) || (0xE0 <= b1 && b1 <= 0xFC)) &&
                ((0x40 <= b2 && b2 <= 0x7E) || (0x80 <= b2 && b2 <= 0xFC))) {
                state.sjis += 2;
                i++;
            }
        }

        for (int i = begin; i < end - 1; i++) {
            byte b1 = buf[i];
            byte b2 = buf[i + 1];
            if (((0xA1 <= b1 && b1 <= 0xFE) && (0xA1 <= b2 && b2 <= 0xFE)) ||
                (b1 == 0x8E && (0xA1 <= b2 && b2 <= 0xDF))) {
                state.euc += 2;
                i++;
            } else if (i + 2 < end_buf) {
                byte b3 = buf[i + 2];
                if (b1 == 0x8F && (0xA1 <= b2 && b2 <= 0xFE) && (0xA1 <= b3 && b3 <= 0xFE)) {
                    state.euc += 3;
                    i += 2;
                }
            }
        }

        for (int i = begin; i < end - 1; i++) {
            byte b1 = buf[i];
            byte b2 = buf[i + 1];
            if ((0xC0 <= b1 && b1 <= 0xDF) && (0x80 <= b2 && b2 <= 0xBF)) {
                state.utf8 += 2;
                i++;
            } else if (i + 2 < end_buf - 2) {
                byte b3 = buf[i + 2];
                if ((0xE0 <= b1 && b1 <= 0xEF) && (0x80 <= b2 && b2 <= 0xBF) && (0x80 <= b3 && b3 <= 0xBF)) {
                    state.utf8 += 3;
                    i += 2;
                }
            }
        }

        return ENC_NONE;
    }

}
}
}
