package jp.ac.ritsumei.is.infobio;
import java.util.*;
import java.util.regex.*;

/**
 * ̑gNXłD
 * @author m
 * @version 20081218
 */
public class Composition extends ArrayList<String>
{
    /**
     * vfȂNX܂D
     */
    public Composition() throws Exception
    {
        super();
    }

    /**
     * CompositionNXVCompositionCX^X쐬܂D
     * @param cp V쐬CompositionNX
     */
    public Composition(Composition cp) throws Exception
    {
        super(cp);
    }

    /**
     * 񂩂CompositionCX^X쐬܂D
     * @param str "Hex, dHex, th38:0"Ƒg񋓂
     */
    public Composition(String str) throws Exception
    {
        str = str.toLowerCase();                  // Sďɕϊ
        String[] strs = str.split("[^\\w:\\-*]"); // P\':'C'-'C'*'ȊO̕ŕ

        for (int i = 0; i < strs.length; i++) // cd->dc, hd->dh, ct->tc, ht->th֓̓~X
        {
            Pattern pt = Pattern.compile("^([ch])([dt])([0-9]+:[0-9])$"); // Ǝb_̏Ԃt
            Matcher mt = pt.matcher(strs[i]);

            if (mt.matches())                                      // Z~hg̓͂Ƀ~X݂ꍇC
                strs[i] = mt.group(2) + mt.group(1) + mt.group(3); // "([dt])"+"([ch])"+"([0-9]+:[0-9])"ƂD
        }

        for (int i = 0; i < strs.length; i++)
            if (!strs[i].equals(""))                      // 񂪊i[ĂȂꍇ͏D
                if (strs[i].matches("[\\w:\\-]+\\*\\d+")) // |Z݂ꍇ
                {
                    String[] temps = strs[i].split("\\*");
                    for (int j = 0; j < Integer.parseInt(temps[1]); j++)
                        this.add(temps[0]); // ̑gi[D
                }
                else
                    this.add(strs[i]); // |Z݂ꍇĈ܂܊i[D
    }

    /**
     * GlycanNXVCompositionCX^X쐬܂D
     * @param gc GlycanNX
     */
    public Composition(Glycan gc) throws Exception
    {
        search(gc); // [DTsCgList֒ǉ
    }

    /**
     * ̑gǉ܂D
     * @param gc ǉGlycanNX
     */
    public void add(Glycan gc) throws Exception
    {
        search(gc); // [DTsCgList֒ǉ
    }

    /**
     * GlycanNXɑ΂A[DTsg߂܂D
     * @param root TreeNX̃[g
     * @param pointer TreeNX̃|C^
     */
    private void search(Glycan pointer)
    {
        super.add(pointer.getNode()); // gList֒ǉ

        for (Glycan gc : pointer.getChildren())  // qm[hȂ΁CɒTsD
            search(gc);
    }

    /**
     * _<I>m/z</I> Ԃ܂D
     * @param monoisotopic mAC\gsbNAx[WMassCalc.MONO_MASSŎw
     * @param adduct tCIMassCalc.Na_IONŎw
     * @return _<I>m/z</I>
     */
    public double getMass(boolean monoisotopic, String adduct) throws Exception
    {
        MassCalc mc = new MassCalc(monoisotopic, adduct);
        return mc.getMass(this);
    }

    /**
     * Z~h܂ł邩Ԃ܂D
     * @return Z~h܂ł邩
     */
    public boolean hasCeramide() throws Exception
    {
        for (String str : this)
            if (str.matches("^([dt])([ch][0-9]+:[0-9])$")) // Z~h݂ꍇ
                return true;

        return false; // Z~h݂Ȃꍇ
    }

    /**
     * g̒Z~h擾܂D
     * @return Z~h
     */
    public String getCeramide() throws Exception
    {
        Iterator<String> it = this.iterator();
        int count = 0; // g̒ɃZ~hP܂͂Oł邩mFD
        String ceramide = null;
        for (String temp : this)
            if (temp.matches("^([dt])([Cch][0-9]+:[0-9])$")) // Z~h݂ꍇC
            {
                ceramide = temp;
                count++;
            }

        if (count == 1) // 1̎
            return ceramide;
        else // Z~ȟ1łȂ
            throw new Exception("Exception at Composition.getCeramide()");
    }

    /**
     * qhLV̐Ԃ܂D
     * @return qhLV̐
     */
    public int countHydroxy() throws Exception
    {
        Pattern pt1 = Pattern.compile("^(pen|xyl|ara|dhex|fuc|rha|hex|glc|gal|man|hexnac|glcnac|galnac|glca|hexa|neuac|kdn|neugc)([0-9]*)(me)*$"); // AZ`ɂ͖Ή
        Pattern pt2 = Pattern.compile("^([dt])([ch])([0-9]+):([0-9])$");
        int count = 0; // qhLV̐
        int bound = 0; // ORVh̐

        for (String str : this)
        {
            Matcher mt1 = pt1.matcher(str);
            Matcher mt2 = pt2.matcher(str);

            if (mt1.matches()) // ̃`
            {
                if (mt1.group(1).matches("^(pen|xyl|ara|dhex|fuc|rha)$")) // fILVwL\[Xƃyg[X̏ꍇC
                    count += 3;                                           // C̕tł镔͍ő3
                else if (mt1.group(1).matches("^(hex|glc|gal|man|hexnac|glcnac|galnac|glca|hexa)$")) // ̓̏ꍇC
                    count += 4;                                                                      // ő4
                else if (mt1.group(1).matches("^(neuac|kdn)$")) // NeuAc,KDŃC6̃`D
                    count += 6;
                else if (mt1.group(1).equals("neugc"))          // NeuGćCNeuAc炳OH1߁C
                    count += 7;                                 // 7̃`D

                int reduce = 0; // ݂̏Ctł鐔C炳ȂĂ͂ȂȂ
                if (mt1.group(2).matches("^\\d$") && mt1.group(3).matches("^(me|ac)$")) // C̐w肵ꍇC
                    reduce = Integer.parseInt(mt1.group(2));                            // w肳ꂽǂݍށD
                else if (mt1.group(3) != null && mt1.group(3).equals("me")) // w肳ĂȂꍇ́C
                    reduce = 1;                                             // 1炷D
                count -= reduce;    // ŌɎw肳ꂽ炷D

                bound++;            // ORVh̐1ZD
            }
            else if (mt2.matches()) // Z~h̃`(Nɂ`)
            {
                if (mt2.group(1).equals("d"))      // WqhLVXtBSV̏ꍇC
                    count += 3;                    // `2iYf2j
                else if (mt2.group(1).equals("t")) // gqhLVXtBSV̏ꍇC
                    count += 4;                    // `3iYf3j

                if (mt2.group(2).equals("h"))      // qhLVb_̏ꍇC
                    count++;                       // `1iYf1j

                bound++;                           // ORVh̐1ZD
            }
            else if (str.matches("^(me|ac)$"))     // C̏ꍇC
            {
                count += 0;                        // `ȂD
                bound++;                           // ORVh̐1ZD
            }
            else if (str.matches("^(-h|h|na|li|k|h2o|-h2o)$")) // tCȈꍇC
            {
                count += 0;                                    // `ȂD
            }
            else                                   // SĂ̏ƈvȂꍇ
                throw new Exception("Unknown Composition in countHydroxy(): " + str);
        }

        if (bound != 0)                        // ݂ꍇ
            count = count - bound + 1;         // ɎgOH폜

        if (!this.hasCeramide() && bound != 0) // ݂ĂCZ~h݂ȂꍇC
            count++;                           // Ҍ[OHJEgD

        return count;
    }

    /**
     * Ɛ𕶎^ƂĕԂ܂D
     * @return Ɛ̕
     */
    public String toString()
    {
        StringBuilder str = new StringBuilder(); // ^[镶

        try
        {
            List<String> li = new ArrayList<String>(new HashSet<String>(this)); // vf̏d폜CArrayList֕ϊ

            Collections.sort(li, new Comparator<String>() // Z~hC܂͎b_CC̑̏Ƀ\[g
            {
                public int compare(String str1, String str2)
                {
                    return getSortScore(str1) - getSortScore(str2); // eP̕я̃XRApD
                }
            });

            Iterator<String> it = li.iterator();
            while (it.hasNext())
            {
                String comp = it.next();                       // g擾
                int count = Collections.frequency(this, comp); // vfTC𐔂D

                comp = Composition.toFixSignage(comp);         // \Lɕϊ

                if (count == 1)
                    str.append(comp);                          // vf1Ȃ΂̂܂܏o
                else
                    str.append(comp + "*" + count);            // vf1ȏȂ΁Ctďo

                if (it.hasNext())
                    str.append(", ");                          // ̗vfĂ", "ǉD
            }
        }
        catch (Exception e)
        {
            System.out.println("Exception at Composition.toString()");
        }

        return str.toString();
    }

    /**
     * toString()\bhɗpeP̕я̃XRAԂ܂D
     * @parm str ^̒P
     */
    private int getSortScore(String str)
    {
        int score; // ^[XRA

        // ܂10PʂCڍׂɋʂꍇ1ʂD
        if (str.matches("^[dt][ch][0-9]+:[0-9]$") || str.matches("^.+pa$") || str.matches("^.+ab$") )
            score = 0; // Z~h܂PȀꍇ
        else if (str.matches("^[dt][0-9]+:[0-9]$"))
            score = 10; // ̏ꍇ
        else if (str.matches("^[Cch][0-9]+:[0-9]$"))
            score = 20; // b_̏ꍇ
        else if (str.matches("^(pc|ins|hexa)$"))
            score = 30; // Ҍ[ł\̍̏ꍇ
        else if (str.matches("^(gal|glc)$"))
            score = 40; // Ҍ[ł\̍̏ꍇ
        else if (str.matches("^(man)$"))
            score = 50; // ɊҌ[ł\̍̏ꍇ
        else if (str.matches("^(hex)$"))
            score = 60;
        else if (str.matches("^(fuc|rha)$"))
            score = 70;
        else if (str.matches("^(dhex)$"))
            score = 80;
        else if (str.matches("^(xyl|rha)$"))
            score = 90;
        else if (str.matches("^(pen)$"))
            score = 100;
        else if (str.matches("^(galnac|glcnac)$"))
            score = 110;
        else if (str.matches("^(hexnac)$"))
            score = 120;
        else if (str.matches("^(gal|glc)\\d*(me|ac)$"))
            score = 130 + countModification(str); // `CAZ`̌ŕёւD
        else if (str.matches("^man\\d*(me|ac)$"))
            score = 140 + countModification(str);
        else if (str.matches("^hex\\d*(me|ac)$"))
            score = 150 + countModification(str);
        else if (str.matches("^(fuc|rha)\\d*(me|ac)$"))
            score = 160 + countModification(str);
        else if (str.matches("^dhex\\d*(me|ac)$"))
            score = 170 + countModification(str);
        else if (str.matches("^(xyl|rha)\\d*(me|ac)$"))
            score = 180 + countModification(str);
        else if (str.matches("^pen\\d*(me|ac)$"))
            score = 190 + countModification(str);
        else if (str.matches("^(galnac|glcnac)\\d*(me|ac)$"))
            score = 200 + countModification(str);
        else if (str.matches("^hexnac\\d*(me|ac)$"))
            score = 210 + countModification(str);
        else if (str.matches("^(p|c|pea|aep|kdn|neuac|neugc)$"))
            score = 220; // ̏ꍇ
        else if (str.matches("^(p|c|pea|aep|kdn|neuac|neugc)\\d*(me|ac)$"))
            score = 230; // ̏ꍇ
        else 
            score = 240;// ̑̏CȂ

        return score;
    }

    /**
     * P̃`CAZ`̌Ԃ܂D
     */
    private int countModification(String str)
    {
        Pattern pt = Pattern.compile("^\\D+(\\d*)(me|ac)$");
        Matcher mt = pt.matcher(str);

        if (mt.matches())
        {
            String number = mt.group(1);
            if (!number.equals(""))
                return Integer.parseInt(number); // w肳Ăꍇ
            else
                return 1;                        // w肵ĂȂꍇ͂PƂȂD
        }
        else
        {
            System.err.println("Monosacchride does not contain modification:" + str);
            return 0; // `CAZ`ĂȂꍇ́C0ԂD
        }
    }

    /**
     * glcGlcցChexnacpaHexNAcPAȂǁCP̕\L𐳂ϊ܂D
     */
    public static String toFixSignage(String before)
    {
        before = before.replace("nac", "NAc"); // "NAc"̒u"Ac"u
        before = before.replace("ac","Ac");

        before = before.replace("me", "Me");
        before = before.replace("et", "Et");
        before = before.replace("tms", "TMS");
        before = before.replace("h2o", "H2O");
        before = before.replace("pa", "PA");
        before = before.replace("ab", "AB");

        before = before.replace("pen", "Pen");
        before = before.replace("xyl", "Xyl");
        before = before.replace("ara", "Ara");

        before = before.replace("lfuc", "LFuc"); // "LFuc"̒u"Fuc"u
        before = before.replace("fuc", "Fuc");
        before = before.replace("rha", "Rha");

        before = before.replace("hexa", "HexA"); // "HexA"̒u"Hex"u
        before = before.replace("hex", "Hex");   // "dHex, HexNAcɒu"

        before = before.replace("glca", "GlcA"); // "GlcA"̒u"Glc"u
        before = before.replace("glc", "Glc");
        before = before.replace("gal", "Gal");
        before = before.replace("man", "Man");

        if (before.equals("s"))
           before = "S";
        if (before.equals("p"))
           before = "P";
        if (before.equals("c"))
           before = "C";
        before = before.replace("pc", "PC");

        before = before.replace("ins", "Ins");
        before = before.replace("aep", "AEP");
        before = before.replace("pea", "PEA");
        before = before.replace("kdn", "KDN");
        before = before.replace("neu", "Neu");
        before = before.replace("gc", "Gc");

        if (before.equals("-h"))
           before = "-H";
        if (before.equals("h"))
           before = "H";
        if (before.equals("na"))
           before = "Na";
        if (before.equals("li"))
           before = "Li";
        if (before.equals("k"))
           before = "K";

        return before;
    }

    /**
     * gł邩𔻒肵܂D
     * @param cp rg
     * @return 茋
     */
    public boolean equals(Composition cp)
    {
        try
        {
            Composition cp1 = new Composition(this); // ԂωȂ悤
            Composition cp2 = new Composition(cp);   // Rs[쐬
            Collections.sort(cp1);                   // \[g
            Collections.sort(cp2);
            return cp1.toString().equals(cp2.toString()); // String̃\bhŔr
        }
        catch (Exception e)
        {
            System.out.println("Exception at Composition.equals()");
            return false; // OꍇC߂lfalseƂD
        }
    }

    /**
     * g̃nbVlԂ܂D
     * @return 茋
     */
    public int hashCode()
    {
        try
        {
            Composition cp = new Composition(this); // ԂωȂ悤Rs[쐬
            Collections.sort(cp);                   // \[g
            return cp.toString().hashCode();        // CompositionhashCode()łƍċAĂ܂߁C
        }                                           // StringhashCode()gp
        catch (Exception e)
        {
            System.out.println("Exception at Composition.hashCode()");
            return 0; // OꍇC߂l0ƂD
        }
    }

    /**
     * Glc,Gal,ManHexցCGlcNAc,GalNAcHexNAc֕ϊ܂D
     */
    public void toHexose()
    {
        ListIterator<String> li = this.listIterator();
        
        while (li.hasNext())
        {
            String str = li.next();
            str = str.replaceAll("glc|man|gal", "hex");   // Hex֕ϊ
            str = str.replaceAll("lfuc|fuc|rha", "dhex"); // dHex֕ϊ
            str = str.replaceAll("xyl|ara", "pen");       // pen֕ϊ
            li.set(str);                                  // ϊgi[D
        }
    }

    /*
     * GlcHexȂǂ̕ϊ̃eXg
     */
    private static void testToHexose() throws Exception
    {
        Composition cp1 = new Composition(new Glycan("[][th39:0]{[][fuc]{}[][hex]{}}"));
        Composition cp2 = new Composition("Hex, Hex*2, th38:0, c18:0, d17:0, Fuc");
        cp1.toHexose();
        cp2.toHexose();
        System.out.println(cp1);
        System.out.println(cp2);
    }

    /**
     * vf܂ł邩𔻒肵܂D
     * @param str 肵
     * @return 茋
     */
    public boolean contains(String str)
    {
        return super.contains(str.toLowerCase()); // ̕ɕϊāCeNX̃\bhɓnD
    }

    /**
     * w肵vf폜܂D
     * @param str 폜
     * @return 폜̐ۂ
     */
    public boolean remove(String str)
    {
        return super.remove(str.toLowerCase()); // ̕ɕϊāCeNX̃\bhɓnD
    }

    /**
     * Globonł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isGloboSeries() throws Exception
    {
        return this.isSeries(new Composition("glc, gal, gal, galnac"));
    }

    /**
     * Lactonł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isLactoSeries() throws Exception
    {
        return this.isSeries(new Composition("glc, gal, glcnac, gal"));
    }

    /**
     * Ganglionł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isGanglioSeries() throws Exception
    {
        return this.isSeries(new Composition("glc, gal, galnac, gal"));
    }

    /**
     * Lactoganglionł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isLactoganglioSeries() throws Exception
    {
        return this.isSeries(new Composition("glc, gal, galnac, glcnac"));
    }

    /**
     * Galanł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isGalaSeries() throws Exception
    {
        return this.isSeries(new Composition("gal, gal"));
    }

    /**
     * Muconł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isMucoSeries() throws Exception
    {
        return this.isSeries(new Composition("glc, gal, gal, gal"));
    }

    /**
     * Arthronł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isArthroSeries() throws Exception
    {
        return this.isSeries(new Composition("glc, man, glcnac, galnac"));
    }

    /**
     * Mollunł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    public boolean isMolluSeries() throws Exception
    {
        return this.isSeries(new Composition("glc, man, man, glcnac"));
    }

    /**
     * w肵nł邩𔻒肵܂D
     * @return w肵nł邩ۂ
     */
    private boolean isSeries(Composition series) throws Exception
    {
        Composition cp = new Composition(this); // VCX^X쐬

        for (String str : series)
        {
            if (cp.contains(str))               // "glc"݂ꍇC폜D
                cp.remove(str);
            else
            {
                Composition temp = new Composition(str); // "Glc"->"Hex"ɕϊ邽߂ɃCX^X쐬
                temp.toHexose();                         // "Glc"->"Hex"ɕϊ

                if (cp.contains(temp.toString()))        // "glc"݂"hex"݂ꍇC폜D
                    cp.remove(temp.toString());
                else
                    return false;                        // "glc""hex"݂ȂꍇC폜ɎsƂD
            }
        }

        return true;                                     // SĂ̏𖞂ꍇ̂݁ČnłƔfD
    }

    /**
     * g瓜n̔s܂D
     */
    private static void testSeries() throws Exception
    {
        Composition cp = new Composition("Glc, Glc, Hex, Hex, GlcNAc, -H2O");
        System.out.println(cp.isGloboSeries());
    }

    /**
     * `̓qhLV̐𐳂Ă邩eXg܂D
     */
    private static void testCountHydroxy() throws Exception
    {
        Composition cp = new Composition("NeuAc2Me");
        System.out.println(cp + " : " + cp.countHydroxy());
    }
}
