﻿using System;
using System.Collections.Generic;
using System.Text;
using ChaKi.Entity.Kwic;
using System.IO;
using ChaKi.Entity.Corpora;
using ChaKi.Service.Database;
using NHibernate;
using ChaKi.Entity.Corpora.Annotations;
using ChaKi.Service.Search;
using ChaKi.Service.Common;
using ChaKi.Entity.Readers;
using ChaKi.Service.Readers;

namespace ChaKi.Service.Export
{
    public class ExportServiceCabocha : ExportServiceBase
    {
        private IList<Segment> m_Segs;
        private IList<Link> m_Links;
        private int m_CurrentDocId;
        private Action<Lexeme> m_LexemeWriter;
        private Dictionary<Document, int> m_DocOffsetCache;

        public ExportServiceCabocha(TextWriter wr)
        {
            ReaderDef def = CorpusSourceReaderFactory.Instance.ReaderDefs.Find("Mecab|Cabocha");
            Initialize(wr, def);
        }

        public ExportServiceCabocha(TextWriter wr, ReaderDef def)
        {
            Initialize(wr, def);
        }

        private void Initialize(TextWriter wr, ReaderDef def)
        {
            m_TextWriter = wr;
            m_Def = def;
            m_CurrentDocId = -1;
            if (m_Def.LineFormat == "TabSeparatedLine")
            {
                m_LexemeWriter = this.WriteChasenLexeme;
            }
            else if (m_Def.LineFormat == "MecabLine")
            {
                m_LexemeWriter = this.WriteMecabLexeme;
            }
            else
            {
                throw new NotImplementedException(string.Format("Export format '{0}' is not supported yet.", m_Def.LineFormat));
            }
            m_DocOffsetCache = new Dictionary<Document, int>();
        }

        public override void ExportItem(KwicItem ki)
        {
            if (m_TextWriter == null) throw new InvalidOperationException("TextWriter is null.");

            IQuery q = m_Session.CreateQuery(string.Format("from Sentence where ID={0}",ki.SenID));
            Sentence sen = q.UniqueResult<Sentence>();
            if (sen == null)
            {
                throw new Exception(string.Format("Sentence not found. Corpus={0}, senID={1}", ki.Crps.Name, ki.SenID));
            }
            m_Segs = m_Session.CreateQuery(string.Format("from Segment s where s.Sentence.ID={0} and s.Tag.Name='Bunsetsu' order by s.StartChar", ki.SenID)).List<Segment>();
            if (m_Segs.Count > 0)
            {
                m_Segs.RemoveAt(m_Segs.Count - 1);
            }
            m_Links = m_Session.CreateQuery(string.Format("from Link l where l.FromSentence.ID={0}", ki.SenID)).List<Link>();

            Segment currentBunsetsu = null;
            int bunsetsuPos = 0;
            // DOCタグの出力
            string cdoc = string.Format("{0}:{1}", ki.Crps.Name, sen.ParentDoc.ID);
            SeqIDTagPair pair;
            if (m_DocumentTags.TryGetValue(cdoc, out pair))
            {
                if (pair.Seqid != m_CurrentDocId)
                {
                    m_TextWriter.WriteLine("#! DOC {0}", pair.Seqid);
                }
                m_CurrentDocId = pair.Seqid;
            }

            // SENTENCETAGの出力
            foreach (var a in sen.Attributes)
            {
                string csa = string.Format("{0}:{1}", ki.Crps.Name, a.ID);
                if (m_SentenceTags.TryGetValue(csa, out pair))
                {
                    m_TextWriter.WriteLine("#! SENTENCETAG {0}", pair.Seqid);
                }
            }

            foreach (Word w in sen.Words)
            {
                Segment b = w.Bunsetsu;
                if (currentBunsetsu != b)
                {
                    // Output Bunsetsu tag
                    WriteCabochaBunsetsu(b);
                    currentBunsetsu = b;
                    bunsetsuPos++;
                }
                if (w.Lex != null)
                {
                    m_LexemeWriter(w.Lex);
                }
            }
            WriteAnnotations(sen);
            m_TextWriter.WriteLine("EOS");
        }

        public override void ExportItem(Corpus crps, Sentence sen)
        {
            if (m_TextWriter == null) throw new InvalidOperationException("TextWriter is null.");

            m_Segs = m_Session.CreateQuery(string.Format("from Segment s where s.Sentence.ID={0} and s.Tag.Name='Bunsetsu' order by s.StartChar", sen.ID)).List<Segment>();
            if (m_Segs.Count > 0)
            {
                m_Segs.RemoveAt(m_Segs.Count - 1);
            }
            m_Links = m_Session.CreateQuery(string.Format("from Link l where l.FromSentence.ID={0}", sen.ID)).List<Link>();
            Segment currentBunsetsu = null;
            int bunsetsuPos = 0;
            // DOCタグの出力
            string cdoc = string.Format("{0}:{1}", crps.Name, sen.ParentDoc.ID);
            SeqIDTagPair pair;
            if (m_DocumentTags.TryGetValue(cdoc, out pair))
            {
                if (pair.Seqid != m_CurrentDocId)
                {
                    m_TextWriter.WriteLine("#! DOC {0}", pair.Seqid);
                }
                m_CurrentDocId = pair.Seqid;
            }
            // SENTENCETAGの出力
            foreach (var a in sen.Attributes)
            {
                string csa = string.Format("{0}:{1}", crps.Name, a.ID);
                if (m_SentenceTags.TryGetValue(csa, out pair))
                {
                    m_TextWriter.WriteLine("#! SENTENCETAG {0}", pair.Seqid);
                }
            }

            foreach (Word w in sen.Words)
            {
                Segment b = w.Bunsetsu;
                if (currentBunsetsu != b)
                {
                    // Output Bunsetsu tag
                    WriteCabochaBunsetsu(b);
                    currentBunsetsu = b;
                    bunsetsuPos++;
                }
                if (w.Lex != null)
                {
                    m_LexemeWriter(w.Lex);
                }
            }
            WriteAnnotations(sen);
            m_TextWriter.WriteLine("EOS");
        }

        protected override void ExportDocumentList()
        {
            if (m_TextWriter == null) throw new InvalidOperationException("TextWriter is null.");

            foreach (KeyValuePair<string, SeqIDTagPair> pair in m_DocumentTags)
            {
                m_TextWriter.Write(pair.Value.Tag);
            }
        }

        protected override void ExportSentenceTagList()
        {
            if (m_TextWriter == null) throw new InvalidOperationException("TextWriter is null.");

            foreach (KeyValuePair<string, SeqIDTagPair> pair in m_SentenceTags)
            {
                m_TextWriter.Write(pair.Value.Tag);
            }
        }

        private void WriteCabochaBunsetsu(Segment seg)
        {
            if (m_Segs == null || m_Links == null)
            {
                throw new Exception(string.Format("Attempt to write Bunsetsu but no Segment found: SegID={0}", seg.ID));
            }
            int segno = m_Segs.IndexOf(seg);
            if (segno < 0)
            {
                m_TextWriter.Write("* 0");       // 文節が1つ（デフォルト文節）しかない場合
            }
            else
            {
                m_TextWriter.Write("* {0}", m_Segs.IndexOf(seg));
            }
            // segの係り先segを求める
            Segment toSeg = null;
            Link link = null;
            foreach (Link l in m_Links)
            {
                if (l.From == seg)
                {
                    link = l;
                    toSeg = l.To;
                    break;
                }
            }
            if (toSeg == null || link == null)
            {
                m_TextWriter.Write(" -1D");
            }
            else
            {
                m_TextWriter.Write(" {0}{1}", m_Segs.IndexOf(toSeg), link.Tag.Name);
            }
            m_TextWriter.Write(" 0/0 0");
            m_TextWriter.WriteLine();
        }

         private void WriteAnnotations(Sentence sen)
        {
            IList<Group> grps = Util.RetrieveWordGroups(m_Session, sen);

            int senOffset = sen.StartChar;
            int segno = 0; // 本メソッドローカルのSegment番号（文内）
            Dictionary<Segment, int> seg_index = new Dictionary<Segment, int>();

            StringBuilder sb = new StringBuilder();
            foreach (Group g in grps)
            {
                sb.Length = 0;
                foreach (Segment s in g.Tags)
                {
                    m_TextWriter.WriteLine("#! SEGMENT_S {0} {1} {2} \"{3}\"", s.Tag.Name, s.StartChar - senOffset, s.EndChar - senOffset,
                        s.GetNormalizedCommentString());
                    sb.AppendFormat("{0} ", segno);
                    seg_index[s] = segno;
                    segno++;
                }
                m_TextWriter.WriteLine("#! GROUP_S {0} {1} \"{2}\"", g.Tag.Name, sb.ToString(), g.GetNormalizedCommentString());
            }

            // Groupの一部およびCabochaで表現される以外のSegmentを出力する
            IList<Segment> segs = Util.RetrieveMiscSegments(m_Session, sen);
            foreach (Segment s in segs)
            {
                m_TextWriter.WriteLine("#! SEGMENT_S {0} {1} {2} \"{3}\"", s.Tag.Name, s.StartChar - senOffset, s.EndChar - senOffset,
                    s.GetNormalizedCommentString());
                seg_index[s] = segno;
                segno++;
            }

            // Cabochaで表現される以外のLinkを出力する
            IList<Link> links = Util.RetrieveMiscLinks(m_Session, sen);
            foreach (Link l in links)
            {
                int from_idx, to_idx;
                if (seg_index.TryGetValue(l.From, out from_idx)
                 && seg_index.TryGetValue(l.To, out to_idx))
                {
                    m_TextWriter.WriteLine("#! LINK_S {0} {1} {2} \"{3}\"", l.Tag.Name, from_idx, to_idx,
                        l.GetNormalizedCommentString());
                }
            }
        }
    }
}
