/*
 * Copyright (c) 2003, Influenza. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.io.*;

/**
 * BPEGR[_
 *
 * +0 [ ubNTCY(b7-b0)  ] +ubNf[^̃TCY
 * +1 [ ubNTCY(b15-b8) ]
 * +2` oCgyA
 *      0-127   f[^n̃oCgyAGg܂
 *              eł邱ƂB
 *              f[^GgԍƓȂ烊eA
 *              قȂ΃oCgyAleftȂ̂Ŏ̃f[^̓oCgyArightɂȂ
 *      128-255 XLbv (n - 127)̘A郊e 
 *              ̒1̃oCgyAGg܂̓e
 *              
 * +??` ubNf[^
 */
public class BPE {
    /**
     * oCgyAu臒l
     * ȏooCgyA̓obt@TCY炵Ăłu
     */
    static final int PAIR_COUNT_THRESHOLD = 5;

    public static void main( String[] args ) throws Exception {
        int skipBytes = 0;
        String src = null;
        String dest = null;
        for( int i = 0; i < args.length; i++ ) {
            if( args[i].charAt( 0 ) == '-' ) {
                if( args[i].equals( "-s" ) ) {
                    i++;
                    if( i >= args.length ) {
                        src = null;
                        break;
                    }
                    skipBytes = Integer.parseInt( args[i] );
                } else {
                    src = null;
                    break;
                }
            } else if( src == null ) {
                src = args[i];
            } else if( dest == null ) {
                dest = args[i];
            }
        }
        if( src == null ) {
            System.out.println(
                "BPE encoder v1.0 (c)2004 Influenza\n"
              + "usage:\n"
              + "  java BPE [-s <skip bytes>] <src> <dest>\n"
            );
            return;
        }

        FileInputStream fis = new FileInputStream( src );
        InputStream in = new BufferedInputStream( fis );
        FileOutputStream fos = new FileOutputStream( dest );
        OutputStream out = new BufferedOutputStream( fos );

        byte[] buf = new byte[8000];
        BPE b1 = new BPE( buf.length );

        System.out.println( "plain block: " + skipBytes + " byte(s)" );
        while( skipBytes > 0 ) {
            int len = in.read( buf, 0, skipBytes );
            out.write( buf, 0, len );
            skipBytes -= len;
        }

        int offset = 0;
        int blockNum = 1;
        while( true ) {
            int srcSize = in.read( buf, offset, buf.length - offset );
            if( srcSize == -1 ) {
                srcSize = 0;
            }
            srcSize += offset;
            if( srcSize == 0 )
                break;

            System.out.print( "Blcok" + blockNum + ": " );
            b1.load( buf, srcSize );
            while( b1.encode() ) {
                b1.load( buf, b1.mark );
            }
            b1.encodeDict();
            b1.printStatistics();
            b1.write( out );
            offset = 0;
            for( int i = b1.srcSize; i < srcSize; i++ )
                buf[offset++] = buf[i];
            blockNum++;
        }

        in.close();
        out.close();
    }

    /**
     * oCgyAo
     * Y = left + (right * 256)
     */
    int[] appear = new int[256 * 256];

    /**
     * gpς݃oCg}bv
     */
    int[] used = new int[256];

    /**
     * oCgyA
     */
    int[] dict = new int[256];

    /**
     * GR[hobt@
     */
    byte[] dictBuf = new byte[600];
    int dictSize;

    byte[] work;
    int length;
    int mark; // gpς݃oCg1ތ点钷
    int srcSize;

    public BPE( int maxBlockSize ) {
        work = new byte[maxBlockSize];
    }

    /**
     * Ώۃf[^obt@workɃRs[ĉ͂
     * @param buf kÕf[^
     * @param len buf̃f[^̒
     */
    public void load( byte[] buf, int len ) {
        for( int i = 0; i < len; i++ )
            work[i] = buf[i];
        srcSize = len;
        length = len;

        for( int i = 0; i < appear.length; i++ )
            appear[i] = 0;
        for( int i = 0; i < 256; i++ ) {
            used[i] = 0;
            dict[i] = i;
        }

        // oCgyAoƃVOoCgoꂼJEg
        int left = -1;
        int old  = -1;
        mark = -1;
        for( int i = 0; i < len; i++ ) {
            int cur = work[i] & 0xFF;

            // VOoCgJEg
            if( used[cur]++ == 0 ) {
                mark = i; // Ōɏ߂ďoʒuۑB߂p
            }

            // oCgyAJEg
            if( i > 0 && (left != cur || old != left) ) {
                appear[left + (cur << 8)]++;
            } else {
                left = -1;
            }

            old = left;
            left = cur;
        }
    }

    /**
     * workk
     * @return ܂oCgyA񂠂̂ɖgpoCgȂƂtrue
     */
    public boolean encode() {
        for( int i = 0; i < 256; i++ ) {
            if( used[i] > 0 )
                continue;

            int max = mostAppearPair();
            if( appear[max] <= 2 )
                return false; // ȏ͖Ӗ

            replace( max & 0xFF, max >> 8, i );
            dict[i] = max;
        }

        return (appear[mostAppearPair()] >= PAIR_COUNT_THRESHOLD);
    }

    /**
     * őooCgyA
     */
    public int mostAppearPair() {
        int max = 0;
        for( int i = 0; i < appear.length; i++ )
            if( appear[i] > appear[max] )
                max = i;
        return max;
    }

    public int write( OutputStream out ) throws Exception {
        int total = dictSize + length;

        out.write( total & 0xFF );
        out.write( total >> 8 );
        out.write( dictBuf, 0, dictSize );
        out.write( work, 0, length );

        return total;
    }

    /**
     * work[left][right]̑gݍ킹[assign]ɒu
     * @param left uΏۃoCgyA̍
     * @param right uΏۃoCgyẢE
     * @param assign oCgyAɊ蓖ĂVOoCg
     */
    private void replace( int left, int right, int assign ) {
        int pos = 0;
        int count = 0;
        for( int i = 0; i < length; i++ ) {
            if( i + 1 < length
             && work[i] == (byte)left && work[i + 1] == (byte)right ) {
                // pending oCg̘A]ꂷ
                if( pos > 0 ) {
                    int l = work[pos - 1] & 0xFF;
                    appear[l | (left << 8)]--;
                    appear[l | (assign << 8)]++;
                }
                if( i + 2 < length ) {
                    int r = work[i + 2] & 0xFF;
                    appear[right | (r << 8)]--;
                    appear[assign | (r << 8)]++;
                }

                work[pos++] = (byte)assign;
                i++;
                count++;
            } else {
                work[pos++] = work[i];
            }
        }
        appear[left | (right << 8)] = 0;
        length = pos;
    }

    private void encodeDict() {
        int pos = 0;

        for( int i = 0; i < 256; ) {
            int len = 0;
            if( dict[i] == i ) {
                // e
                while( i < 256 && dict[i] == i && len < 128) {
                    len++;
                    i++;
                }
                dictBuf[pos++] = (byte)(len + 127);
                if( i == 256 )
                   break;
                len = 0;
            } else {
                // oCgyAAȂe
                int j = i + 1;
                while( (j < 256 && dict[j] != j && len < 128)
                    || (j < 254 && dict[j + 1] != j + 1 && len < 125) ) {
                    len++;
                    j++;
                }
                dictBuf[pos++] = (byte)len;
            }

            for( int j = 0; j <= len; j++ ) {
                dictBuf[pos++] = (byte)(dict[i] & 0xFF);
                if( dict[i] != i )
                    dictBuf[pos++] = (byte)(dict[i] >> 8);
                i++;
            }
        }
        dictSize = pos;
    }

    /**
     * kƂ\
     * f[^TCY(gpς݃oCg) -> kf[^TCY(gpς݃oCg)
     *  k(%) uȂoCgyA̍ő吔
     */
    public void printStatistics() {
        int max = mostAppearPair();

        int usedNum = 0;
        int emptyNum = 0;
        for( int i = 0; i < 256; i++ ) {
            if( used[i] > 0 )
                usedNum++;
            if( used[i] == 0 && dict[i] == i )
                emptyNum++;
        }

        int total = 2 + dictSize + length;

        System.out.println( srcSize + "(" + usedNum + ")" + " -> "
                          + total + "(" + (256 - emptyNum) + ") "
                          + (100f * total / srcSize) + "% "
                          + "max num of left pair:" + appear[max] );
    }

}
