"""Utilities"""

import os
import htmlentitydefs
import re
import marshal

import config


def j(string, encoding="japanese.ms932", error="replace"):
    """Convert string into unicode

    This is equivalent to u operator of str, but default encoding is
    "japanese.ms932" and error handling scheme is "replace"
    """
    if isinstance(string, str):
        return unicode(string, encoding, error)
    else:
        return string

def u(string):
    """Convert UTF-8 encoded string into Unicode"""
    return unicode(string, "utf8")


def extract_html_entities(text):
    """Replace all HTML entitis in the given text and return the result."""
    # The exact regular expression for HTML entity reference is more 
    # complex.  However, replace_entref() looks up the dictionary to 
    # check so that undefined &foo; string will be left.
    # This function is written by 162.
    return re.sub(u"&#?[A-Za-z0-9]+;", _replace_entref, text)

_entity_dict = {}
for k, v in htmlentitydefs.entitydefs.items():
    if v[:2] == "&#" and v[-1] == ";":
        v = unichr(int(v[2:-1]))
    else:
        v = unicode(v, "iso-8859-1")
    _entity_dict[u"&%s;" % k] = v

def _replace_entref(match):
    """Given match object, replace HTML entity with Unicode character 
    and return it.
    """
    key = match.group()
    if key[1] == u"#" and key[2:-1].isdigit():
        new = unichr(int(key[2:-1]))
    else:
        new = _entity_dict.get(key, key)
    return new



class MarshalDict:
    """Dictionary like object which content is saved in a file."""
    def __init__(self, filename):
        self._filename = filename
        if os.path.exists(self._filename):
            self._dict = marshal.loads(file(self._filename).read() or "{}")
        else:
            self._dict = {}
        if not isinstance(self._dict, dict):
            raise TypeError, "not dictionary"

    def __getitem__(self, key):
        return self._dict[key]

    def __setitem__(self, key, value):
        self._dict[key] = value
        self._save()

    def get(self, key, default=None):
        return self._dict.get(key, default)

    def _save(self):
        marshal.dump(self._dict, file(self._filename, "w"))

    def has_key(self, key):
        return self._dict.has_key(key)

    def keys(self):
        return self._dict.keys()

    def items(self):
        return self._dict.items()

    def values(self):
        return self._dict.values()

