package jp.ac.ritsumei.is.infobio;
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;

/**
 * \f[^̓o͏sNXłD
 * @author m
 * @version 20081003
 */
public class GlycanTools
{
    /**
     * KCF`̍\ǂݍ݂܂D
     * @param br ǂݍKCF`t@CBufferedReader
     */
    public List<Glycan> readKCFFile(BufferedReader br) throws Exception
    {
        List<Glycan> li = new ArrayList<Glycan>();

        String line = "";
        StringWriter sw = new StringWriter();
        while ((line = br.readLine()) != null)
        {
            try
            {
                if (line.contains("///"))                              // "///"KCFtH[}bg̋؂蕶
                {
                    sw.flush();                                        // obt@[̓eSĊi[
                    StringReader sr = new StringReader(sw.toString()); // StringWriter̓eStringReader֎󂯓nD
                    GlycanTools gi = new GlycanTools();
                    li.add(gi.readKCF(new BufferedReader(sr)));        // XKCFtH[}bgCX^X쐬
                    sw = new StringWriter();                           // StringWriter
                }
                else
                    sw.write(line + "\n");                             // StringWriterɒǉ
            }
            catch (Exception e)                                        // O̔KCFtH[}bg͖D
            {
                sw = new StringWriter();                               // StringWriter
                System.err.println("KCF file reading error: " + e);
            }
        }
        return li;
    }

    /**
     * KCF`̍\ǂݍ݂܂D
     * @param br ǂݍKCF`t@CBufferedReader
     */
    public Glycan readKCF(BufferedReader br) throws Exception
    {
        String name = "";
        String[]   node = new String[64];      // 64m
        String[][] edge = new String[64][2];   // 64m

        String line;
        int flag = 0;                          // ԑJ
        int i = 0;                             // f[^z֏ގɗpD

        while ((line = br.readLine()) != null) // f[^ǂݏoCzɊi[D
        {
            if (flag == 0)                     // ENTRY
            {
                Pattern pt = Pattern.compile("^ENTRY\\s+(.+?)\\s+Glycan\\s*$");
                Matcher mt = pt.matcher(line);

                if (mt.matches())
                {
                    name = mt.group(1);
                    flag++;                    // ENTRYNODE
                }
            }
            else if (flag == 1)                // NODE錾
            {
                Pattern pt = Pattern.compile("^NODE\\s+(\\d+?)\\s*$");
                Matcher mt = pt.matcher(line);

                if (mt.matches())
                {
                    node = new String[Integer.parseInt(mt.group(1))];
                    flag++;                    // NODEEDGE
                }
            }
            else if (flag == 2)                // NODEf[^
            {
                Pattern pt = Pattern.compile("^\\s*(\\d+?)\\s+(.+?)\\s+(.+?)\\s+(.+?)\\s*$");
                Matcher mt = pt.matcher(line);

                if (mt.matches())
                {
                    node[i] = mt.group(2);
                    i++;
                }

                if (i == node.length)          // zɓ肫ȂȂ
                {
                    i = 0;
                    flag++;                    // NONEEDGE
                }
            }
            else if (flag == 3)                // EDGE錾
            {
                Pattern pt = Pattern.compile("^EDGE\\s+(\\d+?)\\s*$");
                Matcher mt = pt.matcher(line);

                if (mt.matches())
                {
                    edge = new String[Integer.parseInt(mt.group(1))][2];
                    flag++;                    // NONEEDGE
                }
            }
            else if (flag == 4)                // EDGEf[^
            {
                Pattern pt = Pattern.compile("^\\s*(\\d+?)\\s+(.+?)\\s+(.+?)\\s*$");
                Matcher mt = pt.matcher(line);

                if (mt.matches())
                {
                    edge[i][0] = mt.group(2);
                    edge[i][1] = mt.group(3);
                }
                else
                    throw new Exception("KCF EDGE error: " + line + " ENTRY = " + name);

                i++;
            }
        }

        if (flag != 4)                         // ԑJڂIflag==4łȂ΁CO𔭐D
            throw new Exception("KCF format error: " + line);

        // ɃCtƓɗאڍs쐬
        String[][] adjacency = new String[node.length][node.length];   // ~̗אڍs
        for (int k = 0; k < edge.length; k++)
        {
            int x=0, y=0;                                              // אڍs̍W
            String str = "";                                           // אڍs֊i[镶

            Pattern pt1 = Pattern.compile("^(\\d+?):([abr])(\\d+?)$"); // ʏ̏ꍇ
            Pattern pt2 = Pattern.compile("^(\\d+?):(\\d+?)$");        // Am}[zuw肳ĂȂꍇ
            Pattern pt3 = Pattern.compile("^(\\d+?)$");                // Am}[zuƌʒuw肳ĂȂꍇ
            Matcher mt1 = pt1.matcher(edge[k][0]);                     // t
            Matcher mt2 = pt2.matcher(edge[k][0]);                     // t
            Matcher mt3 = pt3.matcher(edge[k][0]);                     // t

            if (mt1.matches())                                         // ʏ̏ꍇ
            {
                x = Integer.parseInt(mt1.group(1)) - 1;                // "-1"邱Ƃɒ
                node[x] = mt1.group(2) + "-" + node[x];                // ɃCt
                str = mt1.group(3);                                    // אڍs֊i[镶
            }
            else if (mt2.matches())                                    // Am}[zuw肳ĂȂꍇ
            {
                x = Integer.parseInt(mt2.group(1)) - 1;                // "-1"邱Ƃɒ
                str = mt2.group(2);                                    // אڍs֊i[镶
            }
            else if (mt3.matches())                                    // Am}[zuƌʒuw肳ĂȂꍇ
            {
                x = Integer.parseInt(mt3.group(1)) - 1;                // "-1"邱Ƃɒ
                str = "0";                                             // ʒu͕sȂ߁C
            }                                                          // אڍs֊i[镶0ƂD
            else
                throw new Exception("ADJACENCY ERROR: " + edge[k][0]);

            pt1 = Pattern.compile("^(\\d+?):([abr])(\\d+?)$");         // Am}[zuw肳Ăꍇ
            pt2 = Pattern.compile("^(\\d+?):(\\d+?)$");                // ʏ̏ꍇ
            pt3 = Pattern.compile("^(\\d+?)$");                        // Ҍ[̌ʒuw肳ĂȂꍇ
            mt1 = pt1.matcher(edge[k][1]);                             // 
            mt2 = pt2.matcher(edge[k][1]);                             // 
            mt3 = pt3.matcher(edge[k][1]);                             // 

            if (mt1.matches())                                         // Am}[zuw肳Ăꍇ
            {
                y = Integer.parseInt(mt1.group(1)) - 1;                // "-1"邱Ƃɒ
                node[y] = mt1.group(2) + "-" + node[y];                // ɃCt
                str = "(" + mt1.group(2) + "+" + str + ")";            // אڍs֊i[镶
            }
            else if (mt2.matches())                                    // ʏ̏ꍇ
            {
                y = Integer.parseInt(mt2.group(1)) - 1;                // "-1"邱Ƃɒ
                str = "(" + mt2.group(2) + "+" + str + ")";            // אڍs֊i[镶
            }
            else if (mt3.matches())                                    // Ҍ[̌ʒuw肳ĂȂꍇ
            {
                y = Integer.parseInt(mt3.group(1)) - 1;                // "-1"邱Ƃɒ
                str = "(" + "0" + "+" + str + ")";                     // ʒu͕sȂ߁C
            }                                                          // אڍs֊i[镶0ƂD
            else
                throw new Exception("ADJACENCY ERROR: " + edge[k][1]);

            adjacency[x][y] = str;                                     // אڍs֊i[
            adjacency[y][x] = str;                                     // אڍs֊i[
        }

        return new Glycan("[]" + graph(0,node,adjacency));
    }

    /**
     * OtTCLinucs`ŕԂ܂DireadKcf()̍Ōɗpj
     * @param num ݃|CgĂm[h
     * @param node m[h
     * @param adjacency אڍs
     */
    private String graph(int num, String[] node, String[][] adjacency)
    {
        String str = "[" + node[num] + "]{";

        for (int i = 0; i < adjacency.length; i++)
            if (adjacency[num][i] != null)
            {
                String temp = new String(adjacency[num][i]);
                adjacency[num][i] = null;                              // אڍs񂩂폜
                adjacency[i][num] = null;                              // אڍs񂩂폜
                str = str + "[" + temp + "]" + graph(i,node,adjacency);
            }

        return str + "}";
    }

    /**
     * Linucs`ŋLqꂽt@Cǂݍ݂܂D
     * @param br URLt@CǂݍBufferedReader
     * @return Glycani[Iterator
     */
    public Iterator<Glycan> readLinucs(BufferedReader br) throws Exception
    {
        ArrayList<Glycan> al = new ArrayList<Glycan>();
        boolean flag = false;                                           // ԑJ
        String line;

        while ((line = br.readLine()) != null)                          // f[^ǂݏoCzɊi[D
            if (line.matches(">.*") && !flag)
                flag = true;                                            // \̓ǂݍ݂
            else if (flag)
            {
                al.add(new Glycan(line));
                flag = false;                                           // ԑJڂ
            }
            else if (!line.matches("^\\s*$"))                           // Ōɋ󔒕łȂΗO
                throw new Exception("Linucs file raed error: " + line);

        return al.iterator();
    }


    /**
     * "G(-G(-G)-G)-G"̂悤Ȍ`GlycanCX^X\z܂D
     * @param str `\ŕ\\z؍\
     */
    public Glycan readNormalFormat(String str) throws Exception
    {
        if (str.equals(""))
            throw new Exception("null");  // ͂ĂȂꍇ́CO

        str = str.toLowerCase();          // Sď֕ϊD
        Glycan gc = new Glycan("[][]{}"); // GlycanCX^X쐬

        if(!str.matches("^.*-.*$"))       // ̒"-"݂Ȃꍇ͖[ƂȂD
            gc.setNode(str);

        int brackets = 0;                 // q̊Kwi[
        int anterior = 0;                 // ؂o̍ŏ̈ʒu
        boolean flag = true;              // falseɂȂfor𔲂

        for (int i = 0; i < str.length() && flag; i++)
        {
            if (i+2 <= str.length())      // substringɂi+2QƂł邩
            {
                if (str.substring(i, i+2).equals("(-"))
                {
                    if (brackets == 0)
                    {
                        gc.setNode(str.substring(0, i));
                        anterior = i + 2;
                    }
                    brackets++;
                    i++;                  // Pǂݔ΂
                }
                else if (str.substring(i, i+2).equals(")-"))
                {
                    brackets--;
                    if (brackets == 0)
                    {
                        List<Glycan> li = new ArrayList<Glycan>();
                        li.add(readNormalFormat(str.substring(anterior, i)));
                        li.add(readNormalFormat(str.substring(i+2)));
                        gc.setChildren(li);
                        flag = false;
                    }
                }
                else if (str.charAt(i) == '-' && brackets == 0)
                {
                    gc.setNode(str.substring(0, i));
                    List<Glycan> li = new ArrayList<Glycan>();
                    li.add(readNormalFormat(str.substring(i+1)));
                    gc.setChildren(li);
                    flag = false;
                }
            }
        }

        return gc;
    }

    /**
     * ؍\`\ŕԂ\bh
     * @return ؍\`\ŕԂ
     */
    String toNormalFormat(Glycan gc) throws Exception
    {
        List<Glycan> li = gc.getChildren();
        if (li.size() == 2)                                       // qQ݂ꍇ
        {
            String str1 = toNormalFormat(li.get(0));
            String str2 = toNormalFormat(li.get(1));

            if (str1.split("-").length < str2.split("-").length)  // vf̏Ȃʂ̒ɓD
                return gc.getNode() + "(-" + str1 + ")-" + str2;
            else if (str1.split("-").length == str2.split("-").length)
            {                                                     // vfȂ΁C
                if (str1.matches("^.*(dhex|fuc|rha|pen|xyl).*$")) // [ƍl铜̂ʂ̒ɓD
                    return gc.getNode() + "(-" + str1 + ")-" + str2;
                else
                    return gc.getNode() + "(-" + str2 + ")-" + str1;
            }
            else
                return gc.getNode() + "(-" + str2 + ")-" + str1;
        }
        else if (li.size() == 1)                                  // qP݂ꍇ
        {
            Iterator<Glycan> it = li.iterator();
            return gc.getNode() + "-"
                 + toNormalFormat(it.next());
        }
        else if (li.size() == 0)                                  // [itj̏ꍇ
            return gc.getNode();
        else
            throw new Exception("toNormalFormat does NOT support "+ li.size() +" children");
    }


    /*
     * "G(-G(-G)-G)-G"̂悤Ȍ`Glycan\z܂D
     * @param str `\ŕ\\z؍\
     */
    public char[][] writeCCSD(Glycan gc) throws Exception
    {
        char[] temp;
        if (gc.hasEdge())
            temp = new String(gc.getNode() + "-(" + gc.getEdge() + ")+").toCharArray();
        else
            temp = new String(gc.getNode()).toCharArray();

        char[][] ccsd = new char[1][temp.length];

        for (int i = 0; i < temp.length; i++)
            ccsd[0][i] = temp[i];

        return ccsd;
    }
}
