/** @file
 */
#if defined(HAVE_CONFIG_H)
#  include "../../config.h"
#endif

#include <string>
#include <cstring>
#include "hyperestraier/hyperestraier.hpp"
#include "index.hpp"

namespace gdestraier {
  namespace model {

    index_type::index_type() :
      database_location_(index_type::LOCAL_FILESYSTEM),
      active_(false),
      filetype_(0),
      document_encoding_(0),
      filesystem_encoding_(0),
      use_Ngram_for_all_languages_(false),
      include_title_to_body_(true),
      include_uri_to_body_(true),
      is_enable_quick_build_(false),
      depth_(5),
      max_documents_(400),
      use_snippet_(true)
    {
    }


    void
    index_type::set_uri_replace(char const* regex, char const* to)
    {
      uri_replace_rule_.is_valid_regex_ = false;
      uri_replace_rule_.regex_ = regex;
      uri_replace_rule_.to_ = to;

      if (*regex) {
        try {
          uri_replace_rule_.compiled_ = regex;
          uri_replace_rule_.is_valid_regex_ = true;
        } catch (...) { }
      }
    }


    void
    index_type::set_title_replace(char const* regex, char const* to)
    {
      title_replace_rule_.is_valid_regex_ = false;
      title_replace_rule_.regex_ = regex;
      title_replace_rule_.to_ = to;

      if (*regex) {
        try {
          title_replace_rule_.compiled_ = regex;
          title_replace_rule_.is_valid_regex_ = true;
        } catch (...) { }
      }
    }


    encoding const*
    index_type::fathom_encoding(char const* first, char const* last) const
    {
      if (document_encoding_ != 0) return document_encoding_;
      
      // 自動認識させる
      int lang;
      if (language_ == "ja")      lang = ESTLANGJA;
      else if (language_ == "zh") lang = ESTLANGZH;
      else if (language_ == "ko") lang = ESTLANGKO;
      else                        lang = ESTLANGEN; // 不明な場合は英語と仮定

      encoding const* enc = encoding::find(::est_enc_name(first, last - first, lang));

      return enc? enc : encoding::find("US-ASCII");
    }



    std::string index_type::str2utf8(std::string const& str) const
    {
      std::size_t n = str.size();
      char const* p = str.c_str();
      return str2utf8(p, p + n);
    }


    std::string index_type::str2utf8(char const* str) const
    {
      return str2utf8(str, str + std::strlen(str));
    }


    std::string index_type::str2utf8(char const* first, char const* last) const
    {
      return fathom_encoding(first, last)->to_utf8(first, last);
    }

  }
}


