# Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
#
# This file is part of Rast.
# See the file COPYING for redistribution information.
#

require File.join(File.dirname(__FILE__), "read-buckets-to-file")

class ApplicationVndSunXmlWriter
  SUPPORTED_VERSION = 1
  MIME_TYPE = "application/vnd.sun.xml.writer"
  EXTENSIONS = ["sxw"]

  include ReadBucketsToFile

  private

  def process_file(filter, mime_type, path)
    escaped_path = escape_shell(path)
    cmd_base = [
      "unzip", "-p", escaped_path,
    ]
    IO.popen((cmd_base + ["content.xml"]).join(" "), "r") do |f|
      process_content_xml(filter, f)
    end
    IO.popen((cmd_base + ["meta.xml"]).join(" "), "r") do |f|
      process_meta_xml(filter, f)
    end
  end

  def process_content_xml(filter, f)
    next_brigade = Rast::Brigade.new
    f.read.scan(TEXT_REGEXP) do |text, |
      s = unescape_xml(text)
      next_brigade.insert_tail(Rast::TransientBucket.new(s + "\n"))
    end
    next_brigade.insert_tail(Rast::EOSBucket.new)
    filter.pass(next_brigade, "text/plain; charset=UTF-8")
  end

  def process_meta_xml(filter, f)
    s = f.read
    db_encoding = filter.db_encoding
    s.scan(TITLE_REGEXP) do |title, |
      property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding,
                                                          title)
      filter.set_property("title", unescape_xml(property))
    end

    s.scan(AUTHOR_REGEXP) do |author, |
      property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding,
                                                          author)
      filter.set_property("author", unescape_xml(property))
    end

    s.scan(META_REGEXP) do |name, value|
      property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding,
                                                          value)
      filter.set_property(name, unescape_xml(property))
    end
  end

  def escape_shell(filename)
    return filename.gsub(/[\'\"\\ ]/, "\\\\\\&")
  end

  def unescape_xml(s)
    return s.gsub(UNESCAPE_REGEXP) do |name,|
      UNESCAPES[$1]
    end
  end

  UNESCAPES = {
    "amp" => "&",
    "lt" => "<",
    "gt" => ">",
    "apos" => "'",
  }
  UNESCAPE_REGEXP = Regexp.new("&(" + UNESCAPES.keys.join("|") + ");")
  TEXT_REGEXP = %r|<text:p.*?>(.*?)</text:p>|
  TITLE_REGEXP = %r|<dc:title>(.*?)</dc:title>|
  AUTHOR_REGEXP = %r|<dc:creator>(.*?)</dc:creator>|
  META_REGEXP = %r|<meta:user-defined meta:name="([^\"]*)">(.*?)</meta:user-defined>|
end
