using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace SlothLib.NLP
{
	/// <summary>
	/// MeCab̌ʃNX
	/// </summary>
	/// <remarks>
	/// 
	/// <newpara>[2007-04-22][ohshima]쐬</newpara>
	/// <newpara>[2007-05-12][ohshima]C</newpara>
	/// </remarks>
	public class MeCabResult :  IMorphologicalAnalyzerResult
	{


		#region private static tB[h̐K\

		/// <summary>
		/// ⣂-fIvV̏o͂Ƀ}b`鐳K\
		/// </summary>
        private static Regex regexResultLine = new Regex(@"^(.*)\t(.*,.*,.*,.*),(.*),(.*),(.*)(,(.*)),(.*)$", RegexOptions.Compiled | RegexOptions.Singleline);
        //private static Regex regexResultLine = new Regex(@"^(.*)\t(.*,.*,.*,.*),(.*),(.*),(.*),(.*),(.*)$", RegexOptions.Compiled | RegexOptions.Singleline);
        /// <summary>
		/// m̎ɂȂڂ̂ɑΉK\
		/// </summary>
        private static Regex regexResultLine2 = new Regex(@"^(.*)\t(.*,.*,.*,.*),(.*),(.*),(.*)(,(.*))?$", RegexOptions.Compiled | RegexOptions.Singleline);
        //private static Regex regexResultLine2 = new Regex(@"^(.*)\t(.*,.*,.*,.*),(.*),(.*),(.*),(.*)$", RegexOptions.Compiled | RegexOptions.Singleline);

		#endregion

        private List<MeCabMorpheme> morphemeList;

		/// <summary>
		/// MeCab̐̌
		/// </summary>
		private string rawResult;

        /// <summary>
        /// EOSƂMeCab̐̌
        /// </summary>
        private List<string> sequenceRawResult;

        //GetOriginalArray()邽߂̃tB^
        private static RemainOriginalFilter remainOriginalFilter = new RemainOriginalFilter();
        private static RemainPosFilter remainPosFilter = new RemainPosFilter();
        private static RemainRawFilter remainRawFilter = new RemainRawFilter();


		// \w`\ti,iו1,iו2,iו3,p`,p^,`,ǂ,

        /// <summary>
        /// RXgN^
        /// </summary>
        /// <param name="rawResult">MeCabԂ̌</param>
		public MeCabResult(string rawResult)
		{
            this.morphemeList = new List<MeCabMorpheme>();

			this.rawResult = rawResult;
            this.sequenceRawResult = new List<string>();

			StringBuilder sb = new StringBuilder();

			// sƂɐ؂蕪B
            List<string> lineList = new List<string>(rawResult.Split(new string[] { "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries));

			// sƂɌĂB
			foreach (string line in lineList)
			{
				Match successMatch = null;
				string pronunciation = "*";

				Match match = regexResultLine.Match(line);
				if (!match.Success && line == "EOS")
				{
					sequenceRawResult.Add(sb.ToString());
					sb = new StringBuilder();
                    continue;
				}

				Match match2 = null;
				if (match.Success)
				{
                    //pronunciation = match.Groups[7].ToString();
                    pronunciation = match.Groups[8].ToString();
                    successMatch = match;
				}
				else
				{
					match2 = regexResultLine2.Match(line);
					if (match2.Success)
					{
						successMatch = match2;
					}
					else
					{
						// ɏǂ蒅Ȃ牽炩̑ΏKv邩ȂB
                        if (line.Contains("\t"))
                        {
                            System.Diagnostics.Debug.WriteLine("MeCab̏o͂s̉͂Ɏs܂");
                        }
						continue;
					}
				}

				string raw = successMatch.Groups[1].ToString();
				string pos = successMatch.Groups[2].ToString();
				string conjugationPattern = successMatch.Groups[3].ToString();
				string conjugationForm = successMatch.Groups[4].ToString();
                string original = successMatch.Groups[5].ToString();
                if (!match.Success)
                {
                    original = raw; //original*̏ꍇraw
                }
                //string reading = successMatch.Groups[6].ToString();
                string reading = successMatch.Groups[7].ToString();
                if (string.IsNullOrEmpty(reading))
                {
                    reading = "*";
                }

				sb.Append(line);
				sb.Append("\r\n");

				MeCabMorpheme morpheme = new MeCabMorpheme(raw, pos, conjugationPattern, conjugationForm, original, reading, pronunciation);
				this.morphemeList.Add(morpheme);
			}
		}


		/// <summary>
		/// MeCabo͂̌ʁB
		/// </summary>
		public string RawResult
		{
            get { return this.rawResult; }
		}



		/// <summary>
		/// EOSƂ̌ʂ̐Ԃ
		/// </summary>
		/// <returns>EOSƂɕʂ̐</returns>
		public int GetSequenceCount()
		{
			return this.sequenceRawResult.Count;
		}

		/// <summary>
		/// EOSƂɌʂ𕪂ĕԂ
		/// </summary>
        /// <returns>EOSƂɕꂽChaSenResult̔z</returns>
		public MeCabResult[] GetSequenceResults()
		{
			MeCabResult[] result = new MeCabResult[this.sequenceRawResult.Count];
			for (int i = 0; i < result.Length; i++)
			{
				result[i] = new MeCabResult(this.sequenceRawResult[i]);
			}
			return result;
		}


		/// <summary>
		/// CfNT
		/// </summary>
		/// <param name="index">`ԑf̔ԍ</param>
		/// <returns>indexԖڂ̌`ԑf</returns>
		public MeCabMorpheme this[int index]
		{
            get { return (MeCabMorpheme)this.morphemeList[index]; }
		}

        /// <summary>
        /// i[`ԑf̔z
        /// </summary>
        public MeCabMorpheme[] Morphemes
        {
            get
            {
                return this.morphemeList.ToArray();
            }
        }


        #region IMorphologicalAnalyzerResult o


        IMorpheme[] IMorphologicalAnalyzerResult.Morphemes
        {
            get
            {
                return this.morphemeList.ToArray();
            }
        }

        /// <summary>
        /// e`ԑfRawzɂĕԂ
        /// </summary>
        /// <returns>Raw̔z</returns>
        public string[] GetRawArray()
        {
            return remainRawFilter.DoFilter(this.Morphemes);
        }

        /// <summary>
        /// e`ԑfOriginalzɂĕԂ
        /// </summary>
        /// <returns>Original̔z</returns>
        public string[] GetOriginalArray()
        {
            return remainOriginalFilter.DoFilter(this.Morphemes);
        }

        /// <summary>
        /// e`ԑfPOSzɂĕԂ
        /// </summary>
        /// <returns>POS̔z</returns>
        public string[] GetPOSArray()
        {
            return remainPosFilter.DoFilter(this.Morphemes);
        }

        #endregion
    }
}
