/*
 * entity converter
 *
 * Copyright(c) 2009 olyutorskii
 * $Id: EntityConverter.java 638 2009-08-07 18:01:32Z olyutorskii $
 */

package jp.sourceforge.jindolf.parser;

import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 人狼BBSで用いられる4種類のXHTML実体参照の
 * 解決を伴う{@link DecodedContent}の切り出しを行う。
 * 実体参照は{@code &gt; &lt; &quot; &amp;}が対象。
 * マルチスレッドには対処していない。
 */
public class EntityConverter{

    private static final String[][] xchgTable = {
        {"&gt;",   ">"},
        {"&lt;",   "<"},
        {"&quot;", "\""},
        {"&amp;",  "&"},
    };

    private static final Pattern xchgPattern;
    private static final int[] xchgGap = new int[xchgTable.length];

    static{
        StringBuilder regex = new StringBuilder();
        for(int idx = 0; idx < xchgTable.length; idx++){
            String[] xchg = xchgTable[idx];
            String xchgFrom = xchg[0];
            String xchgTo   = xchg[1];
            
            if(regex.length() > 0) regex.append('|');
            regex.append('(').append(Pattern.quote(xchgFrom)).append(')');

            xchgGap[idx] = xchgFrom.length() - xchgTo.length();
        }
        xchgPattern = Pattern.compile(regex.toString());
    }

    private final Matcher matcher = xchgPattern.matcher("");

    private DecodedContent from = null;
    private DecodedContent to = null;
    private ListIterator<DecodeErrorInfo> errorIterator;
    private int gap;

    /**
     * コンストラクタ
     */
    public EntityConverter(){
        super();
        return;
    }

    /**
     * デコードエラー情報を調整しながら元データをコピーする。
     * @param fromStart 元データコピー開始位置
     * @param fromEnd 元データコピー終了位置
     */
    private void simpleCopy(int fromStart, int fromEnd){
        this.to.append(this.from.getRawContent(), fromStart, fromEnd);

        if(this.errorIterator == null) return;

        while(this.errorIterator.hasNext()){
            DecodeErrorInfo info = this.errorIterator.next();
            int pos = info.getCharPosition();
            if(pos < fromStart){
                continue;
            }else if(fromEnd <= pos){
                if(this.errorIterator.hasPrevious()){
                    this.errorIterator.previous();
                }
                break;
            }
            DecodeErrorInfo newInfo = info.createGappedClone(this.gap);
            this.to.addPlainDecodeError(newInfo);
        }

        return;
    }

    /**
     * 実体参照の変換を行う。
     * @param content 変換元文書
     * @param startPos 開始位置
     * @param endPos 終了位置
     * @return 切り出された変換済み文書
     * @throws IndexOutOfBoundsException 位置指定に不正があった
     */
    public DecodedContent convert(DecodedContent content,
                                   int startPos, int endPos)
            throws IndexOutOfBoundsException{
        if(   startPos > endPos
           || startPos < 0
           || content.length() < endPos){
            throw new IndexOutOfBoundsException();
        }

        this.matcher.reset(content.getRawContent());
        this.matcher.region(startPos, endPos);
        this.gap = startPos;
        this.from = content;
        this.to = new DecodedContent();
        if(content.hasDecodeError()){
            this.errorIterator = content.getDecodeErrors().listIterator();
        }else{
            this.errorIterator = null;
        }

        int lastPos = startPos;
        while(this.matcher.find()){
            int group;
            int matchStart = -1;
            for(group = 1; group <= xchgTable.length; group++){
                matchStart = this.matcher.start(group);
                if(matchStart >= 0) break;
            }
            int matchEnd = this.matcher.end(group);

            simpleCopy(lastPos, matchStart);

            this.gap += xchgGap[group -1];

            String toStr = xchgTable[group -1][1];
            this.to.append(toStr);

            lastPos = matchEnd;
        }
        simpleCopy(lastPos, endPos);

        DecodedContent result = this.to;

        this.matcher.reset("");
        this.from = null;
        this.to = null;
        this.errorIterator = null;
        
        return result;
    }

}
