# -*- coding: utf8 -*-
"""BBS

Root of bulltin board tree
"""

from __future__ import generators
import os
import marshal
import re
import xml.dom.minidom as minidom

from utils import *
import config
import http
from category import Category
from board import Board


class BBS(object):
    """BBS

    Root of bulltin board tree
    """
    def __init__(self):
        self._title = unicode("２ちゃんねる","utf8")
        self._filepath = os.path.join(config.CACHEDIR,"boardtree.xml")
        self._board_tree_url_host = "www.ff.iij4u.or.jp"
        self._board_tree_url_path = "/~ch2/bbstable.html"
        self._categories = self._build_categories()

    def title(self):
        """Return name of the BBS"""
        return self._title

    def categories(self):
        """Return iterator of categories"""
        return iter(self._categories)

    def update(self):
        """Update BBS

        This method downloads the board tree, saves the data and build 
        internal data structure.
        """
        # Download
        raw_data = http.get(self._board_tree_url_host,
                            self._board_tree_url_path)
        # Convert
        xml = _bbstable2xml(j(raw_data["content"]))
        # Save
        dirname = os.path.dirname(self._filepath)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        file(self._filepath,"w").write(xml.encode("utf8"))
        # Build
        self._categories = self._build_categories()

    def _build_categories(self):
        """Return a list of categories

        This method loads data file, create a list of board and return
        the list.  If the data file is not found, empty list is
        returned.
        """
        if not os.path.exists(self._filepath):
            return []
        dom = minidom.parse(self._filepath)
        categories = []
        for ce in dom.getElementsByTagName("category"):
            c = Category(ce.getAttribute("title"))
            for be in ce.getElementsByTagName("board"):
                c.append(Board(str(be.getAttribute("host")),
                               str(be.getAttribute("path")),
                               be.getAttribute("title")))
            categories.append(c)
        return categories


def _bbstable2xml(html):
    """Convert bbs menu HTML string into XML and return it as string

    This method assumes the given string is in Unicode.
    """
    assert isinstance(html, unicode)
    categories = [c for c in _parse_categories(j(html))]
    return """<bbs title="2ch">%s</bbs>""" % u"\n".join(categories)

_category_re = re.compile(j("【<B>(?P<title>.+?)</B>】.+?(?=【)","utf8"),
                          re.I|re.S)
def _parse_categories(html):
    """Iterater over all category nodes

    A category node means an unicode string that contains a "category" 
    element which contains "board" elements in XML.
    """
    start = 0
    while True:
        m = _category_re.search(html, start)
        if m:
            title = m.group("title")
            boards = [b for b in _parse_boards(html, m.start(), m.end())]
            if len(boards)>0 and title!=unicode("運営","utf8"):
                yield u"""<category title="%s">%s</category>"""%\
                      (title, "".join(boards))
            start = m.end()
        else:
            break

_board_re = re.compile(j("""<A[ ]HREF=http://(?P<host>.+?)/(?P<path>.+?)/>
                            (?P<title>.+?)
                            </A>"""),
                       re.I|re.X)
def _parse_boards(html, start, end):
    """Parse HTML content in (start, end) range and yield every board element

    A board element means that in XML node format.
    """
    while True:
        m = _board_re.search(html, start, end)
        if m:
            yield u"""<board title="%s" host="%s" path="%s"/>""" %\
              (m.group("title"), m.group("host"), m.group("path"))
            start = m.end()
        else:
            break

