/* -*- c++ -*- */
#ifndef AKAXISO2_XML_ENCODING_NAME_H__
#define AKAXISO2_XML_ENCODING_NAME_H__

/**
 * @file akaxiso2/transcoders/encoding_name.h
 * @brief encoding name-related functions.
 */

#include <akaxiso2/configuration.h>
#include <string>

namespace aka2 {

  /**
   * @brief encoding-specific byte sequence, like BOM.
   */
  struct char_mark {
    /** @brief byte sequence */
    char mark_[8];
    /** @brief length of byte sequence. */
    size_t length_;
  };

  extern const struct char_mark utf7_bom;
  extern const struct char_mark utf8_bom;
  
  extern const struct char_mark ucs2be_bom;
  extern const struct char_mark ucs4be_bom;
  extern const struct char_mark ucs2le_bom;
  extern const struct char_mark ucs4le_bom;

  /**
   * @brief guessing the system default encoding. 
   *
   * Called from aka::initialize(), if user-defined default encoding is not given.
   */
  void guess_default_encoding();

  /**
   * @brief default encoding used in akaxiso2 lib.
   * @return default encoding name
   *
   * If POSIX-locale API, locale(), is available, this function will get an LC_CTYPE value, 
   * and use the value after period, '.', . 
   * Ex. 
   * 'ja_JP.eucJP' is the typical value for Japanese system, 
   * therefore a typical return value for Linux systems in Japanese environment is 'eucJP'. 
   * For Win32, this function will find an encoding name in the registry 
   * by using the codepage returned by GetACP(), 
   * The Japanese version of Windows should return 'shift_jis' (Code Page 932).
   */
  const std::string &get_default_encoding();

  /**
   * @brief get a char_mark for Byte-order-mark (BOM).
   * @return pointer to char_mark that holds BOM if encoding is one of unicodes.
   *
   * Supported unicode names are;
   *  UTF-32, UCS-4, UTF-16, UCS-2, UTF-8, UTF-7.
   */
  const struct char_mark *get_unicode_bom(const std::string &encoding);

  /**
   * @brief check encoding is 2 byte character seuqences.
   * @return returns true for UTF-16, UTF-16BE, UTF-16LE, UCS-2.
   *
   * This function will also check UTF-16/UCS-2 alias names, 
   * and returns true if the encoding name is  UTF-16 or it's alias name.
   */
  bool is_ucs2(const std::string &encoding);

  /**
   * @brief check encoding is UTF-8.
   * @return return true if encoding is UTF-8.
   *
   * This function will also check UTF-8 alias names, 
   * and returns true if the encoding name is  UTF-8 or it's alias name.
   */
  bool is_utf8(const std::string &encoding);

  /**
   * @brief check if the given encoding is not a 1-byte sequence.
   * @return true if the encoding is not a 1-byte sequence.
   */
  bool is_binary_encoding(const std::string &encoding);

  /**
   * @brief get encoding key.
   * @return encoding key string.
   * 
   * Encoding key is used for algorithmic search for alias names, and represented as:
   *  - sequence of upper letters,
   *  - '-' and '_' are ignored.
   */
  std::string create_encoding_key(const std::string &encoding);

}
#endif

