﻿using System;
using System.Collections.Specialized;
using System.Globalization;
using System.Text.RegularExpressions;

namespace HgCo.WindowsLive.SkyDrive.Support
{
    /// <summary>
    /// Provides methods for parsing HTML documents.
    /// </summary>
    internal static class HtmlDocumentHelper
    {
        /// <summary>
        /// Gets a tag by name.
        /// </summary>
        /// <param name="htmlDocument">The HTML document to parse.</param>
        /// <param name="tagName">The name of the tag.</param>
        /// <returns>The tag.</returns>
        public static string GetTagByName(string htmlDocument, string tagName)
        {
            if (!String.IsNullOrEmpty(tagName))
            {
                Regex regexTag = new Regex("<\\s*(?<Name>\\w+)(\\s+(?<Attributes>(\\s*[\\w\\-]+(\\s*=\\s*(\"[^\"]*\")|('[^']*'))?)*))?\\s*/?\\s*>");
                MatchCollection matchTags = regexTag.Matches(htmlDocument);
                foreach (Match matchTag in matchTags)
                {
                    string attributes = matchTag.Groups["Attributes"].Value;
                    string name = GetTagAttributeValueByName(attributes, "name");
                    if (tagName.Equals(name, StringComparison.InvariantCultureIgnoreCase))
                        return matchTag.Value;
                }
            }
            return null;
        }

        /// <summary>
        /// Gets the meta tag's refresh URI.
        /// </summary>
        /// <param name="htmlDocument">The HTML document.</param>
        /// <returns>The refresh URI.</returns>
        public static Uri GetMetaTagRefreshUri(string htmlDocument)
        {
            Regex regexTagMeta = new Regex("(?i:<\\s*(?<Name>meta)(\\s+(?<Attributes>(\\s*[\\w\\-]+(\\s*=\\s*(\"[^\"]*\")|('[^']*'))?)*))?\\s*/?\\s*>)");
            Regex regexTagMetaRefreshUrl = new Regex("URL=(?<URL>[^\"]+)");
            MatchCollection matchMetaTags = regexTagMeta.Matches(htmlDocument);
            foreach (Match matchMetaTag in matchMetaTags)
            {
                NameValueCollection attributes = GetTagAttributes(matchMetaTag.Value);
                if (!String.IsNullOrEmpty(attributes["http-equiv"]) &&
                    attributes["http-equiv"].Equals("refresh", StringComparison.InvariantCultureIgnoreCase))
                {
                    string url = regexTagMetaRefreshUrl.Match(attributes["content"]).Groups["URL"].Value;
                    return UriHelper.GetUri(url);
                }
            }
            return null;
        }

        /// <summary>
        /// Gets the value of a tag's attribute.
        /// </summary>
        /// <param name="tagAttributes">The tag attributes.</param>
        /// <param name="tagAttributeName">The name of the tag attribute to get.</param>
        /// <returns>The attribute's value.</returns>
        public static string GetTagAttributeValueByName(string tagAttributes, string tagAttributeName)
        {
            if (!String.IsNullOrEmpty(tagAttributeName))
            {
                Regex regexTagAttribute = new Regex("(?<Name>[\\w\\-]+)(\\s*=\\s*(\"(?<Value>[^\"]*)\")|('(?<Value>[^']*')))?");
                MatchCollection matchTagAttributes = regexTagAttribute.Matches(tagAttributes);
                foreach (Match matchTagAttribute in matchTagAttributes)
                {
                    string name = matchTagAttribute.Groups["Name"].Value;
                    if (tagAttributeName.Equals(name, StringComparison.InvariantCultureIgnoreCase))
                        return matchTagAttribute.Groups["Value"].Value;
                }
            }

            return null;
        }

        /// <summary>
        /// Gets a tag attributes.
        /// </summary>
        /// <param name="tag">The tag.</param>
        /// <returns>The collection of attributes.</returns>
        public static NameValueCollection GetTagAttributes(string tag)
        {
            NameValueCollection tagAttributes = new NameValueCollection();
            if (!String.IsNullOrEmpty(tag))
            {
                Regex regexTag = new Regex("<\\s*(?<Name>\\w+)(\\s+(?<Attributes>(\\s*[\\w\\-]+(\\s*=\\s*(\"[^\"]*\")|('[^']*'))?)*))?\\s*/?\\s*>");
                Match matchTag = regexTag.Match(tag);
                Regex regexTagAttribute = new Regex("(?<Name>[\\w\\-]+)(\\s*=\\s*(\"(?<Value>[^\"]*)\")|('(?<Value>[^']*')))?");
                MatchCollection matchTagAttributes = regexTagAttribute.Matches(matchTag.Groups["Attributes"].Value);
                foreach (Match matchTagAttribute in matchTagAttributes)
                {
                    string name = matchTagAttribute.Groups["Name"].Value.ToLower(CultureInfo.InvariantCulture);
                    string value = matchTagAttribute.Groups["Value"].Value;
                    tagAttributes.Add(name, value);
                }
            }

            return tagAttributes;
        }

        /// <summary>
        /// Decodes a unicode string by replacing the escaped strings to the appropriate unicode chars.
        /// </summary>
        /// <param name="text">The string to decode.</param>
        /// <returns>The decoded string.</returns>
        public static string DecodeUnicodeString(string text)
        {
            Regex regexUnicodeChar = new Regex(@"&#(?<Value>\d+);");
            string valueParsed = regexUnicodeChar.Replace(text, delegate(Match match)
            {
                string unicodeValue = match.Groups[1].Value;
                char c = (char)Int32.Parse(unicodeValue, CultureInfo.InvariantCulture);
                return c.ToString();
            });
            return valueParsed;
        }

        /// <summary>
        /// Decodes a javascript string by replacing the escaped strings (\x00) 
        /// to the appropriate ASCII char.
        /// </summary>
        /// <param name="text">The string to decode.</param>
        /// <returns>The decoded string.</returns>
        public static string DecodeJavascriptString(string text)
        {
            Regex regexJavascriptChar = new Regex(@"\\(?<Value>(x[0-9A-Fa-f]{2})|(\d{2}))");
            string valueParsed = regexJavascriptChar.Replace(text, delegate(Match match)
            {
                string jsValue = match.Groups[1].Value;
                if (jsValue.StartsWith("x", StringComparison.InvariantCultureIgnoreCase))
                {
                    char c = (char)Int32.Parse(jsValue.Substring(1), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
                    return c.ToString();
                }
                else
                {
                    char c = (char)Int32.Parse(jsValue, CultureInfo.InvariantCulture);
                    return c.ToString();
                }
            });
            return valueParsed;
        }

    }
}
