# prime/engine/engine-learndict.rb
# $Id: engine-learndict.rb,v 1.2 2003/06/11 17:55:25 komatsu Exp $
#
# Copyright (C) 2002, 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

require 'jcode'
require 'prime/engine/engine-japanese'

$KCODE = 'e'

LEARNDICT = 'prime-dict-user'

$engine_class_name = 'PrimeEngineLearndict'

class PrimeEngineLearndict < PrimeEngineJapanese
  def initialize
    super

    @name = "Learndict engine"
    @id   = "learndict"
    @description = "Learndict engine"

    # ̤ͥʸ
    @filepath = File::join2(PRIME_USER_DIR, LEARNDICT)
    @diff_filepath = @filepath + "_diff"
    @dict = initialize_userdict

    file_option = (File::CREAT|File::WRONLY|File::APPEND)
    @file      = Kernel::open(@filepath,      file_option)
    @diff_file = Kernel::open(@diff_filepath, file_option)
    @file.chmod(0600)      if @file::stat.owned?
    @diff_file.chmod(0600) if @diff_file::stat.owned?

    @max_candidates = 10
    @prev_context = ""
  end

  def initialize_userdict
    leardict_object = File::join2(PRIME_USER_DIR, LEARNDICT + ".rbo")
    load_diff = true
    dict = Marshal::init_file(leardict_object) {
      load_diff = false
      dict = PrimeLearnDict.new
      dict.load_dict(@filepath)
      dict
    }
    File::chmod(0600, learndict_object) if FileTest::owned?(learndict_object)

    if load_diff then
      dict.load_dict(@diff_filepath)
      Marshal::dump_file(dict, leardict_object)
    end
    diff_file = Kernel::open(@diff_filepath, "w")
    diff_file.close

    return dict
  end

  def learn_word (key, value, part, context, suffix, rest)
# 1). ʤʤɤ0ˤʤ뤳Ȥ⤢.
# 2). Ҥ餬ʤŪ0ˤȤ⤢.
# 3). 饤ȤξΤ֤ʤ?
# 4). Ҥ餬ʤ촴ˤưγؽɤ뤫.
#     if key == value then
#       # , ե¸Ϥ뤱, ǡˤȿǤʤ
#       write_dict2 (value, context, suffix, rest)
#       return true
#     end

    part = (part.length > 0) ? part : "̤θ"
    @dict.set_word(key, value, part, context)

    key2 = key
    val2 = value
    if suffix.length > 0 then
      key2 += suffix
      val2 += suffix
      @dict.set_word(key2, val2, "ʸ", context)
    end
    if rest.length > 0 then
      key2 += rest
      val2 += rest
      @dict.set_word(key2, val2, "ʸ", context)
    end

    write_dict(key, value, part, context, suffix, rest)

    return true
  end

  def close
    flush_dict
    @file.close
    return true
  end

  private
  def lookup (input, method = :prefix)
    results = PrimeResult.new
    queries = make_queries(input.base)
    if method == :prefix then
      input.expands.each {|string|
	queries.add(string) 
      }
      rests = queries.rests
      max   = @max_candidates
    else ## method == :exact
      queries.add(input.original, nil, "", "", false)
      rests = [""]
      max   = 100
    end

    context_indexes = (@dict.context[input.context] or [])

    rests.each {|rest|
      indexes = lookup_dict(queries.query_lines(rest), max)
      indexes = (indexes & context_indexes) + (indexes - context_indexes)
      indexes.each {|index|
	(pattern, word, part, context) = @dict.data[index]
	suffix = get_suffix(input.base, pattern, rest)
        priority =
	  15000 + (index * 1000 / @dict.data.length) - (rest.length * 1000)
	cand = PrimeCandidate.new(pattern, word, priority,
				  part, suffix, rest)
	results << cand
	max -= 1
	(max > 0) or break
      }
    }
    return results
  end

  def lookup_dict (query_lines, max = nil)
    results = []
    return results if max == (nil or 0)

    if !(query_lines.empty?) then
      query_lines.each {|query|
# 	(@dict.dict[query] or []).each {|index|
# 	  p @dict.data[index]
# 	}
	results = results | (@dict.dict[query] or [])
# 	if max and (results.length > max) then
# 	  break
# 	end
      }
    end
    # ["a", "abc", "a", "ab", "a"] ξ,
    # 3ñ줷ͽ¬ʤͤˤʤäƤޤäƤ.
    return results.sort.reverse[0,max] 
  end

  def lookup_part (base)
    return (@dict.part[base] or [])
  end

  ## Dictionary file

  def write_dict (key, value, part, context, suffix, rest)
    if context != @prev_context or context == "" then
      flush_dict
      add_buffer(context)
    end
    add_buffer([key, part, value])
    add_buffer(suffix)
    add_buffer(rest)
    @prev_context = [value, suffix, rest].join
  end

  def write_dict2 (value, context, suffix, rest)
    if context != @prev_context or context == "" then
      flush_dict
      add_buffer(context)
    end
    add_buffer(value)
    add_buffer(suffix)
    add_buffer(rest)
    @prev_context = [value, suffix, rest].join
  end

  def add_buffer (line)
    return if line == ""

    if line.class == Array then
      line = line.join("\t")
    end
    @learndict_buffer = "\n" unless @learndict_buffer
    @learndict_buffer += (line + "\n")
  end

  def flush_dict
    return unless @learndict_buffer
    @file.flock(File::LOCK_EX|File::LOCK_NB)
    @file.print(@learndict_buffer)
    @file.flock(File::LOCK_UN|File::LOCK_NB)
    @diff_file.flock(File::LOCK_EX|File::LOCK_NB)
    @diff_file.print(@learndict_buffer)
    @diff_file.flock(File::LOCK_UN|File::LOCK_NB)
    @learndict_buffer = nil
  end

end

class PrimeLearnDict
  attr_reader :context, :part, :dict, :data

  def initialize
    @context = {} # key: context,  value: index
    @part    = {} # key: pattern,  value: part
    @dict    = {} # key: pattern,  value: index
    @data    = [] # key: index,    value: word
  end

  def set_word (key, value, part, context)
    key or return false

    ## ƥȤΤĤʤȤϿ.
    index = @data.length
    data  = [key, value, part, context]

    #    @freq[data.join("\t")] = [] unless @freq[data.join("\t")]
    #    @freq[data.join("\t")] << index

    @data[index] = data

    if context and context.length > 0 then
      @context[context] = [] unless @context[context]
      @context[context].push(index) 
    end

    pattern = ""
    (key.split(//) + ["\t", part + "\t"]).each {|char|
      pattern += char
      @dict[pattern] = [] unless @dict[pattern]
      @dict[pattern].push(index)
    }

    @part[key] = [] unless @part[key]
    @part[key].push(part)
  end

#   def get_word (key, part = nil)
#     if key.nil? or key.length == 0 then
#       return nil
#     end
#     return @data[[key, "\t", part].join]
#   end

  def load_dict (filename)
    File::exist?(filename) or return

    context   = ""
    pre_key   = ""
    pre_value = ""
    Kernel::open(filename, "r").readlines.each {|line|
      if line =~ /^[ \t]*$/ then
	context   = ""
	pre_key   = ""
	pre_value = ""
	next
      end

      (key, part, value) = line.chomp.split(/\t/)
      if part == nil or part == ""then
	if context == "" then
	  context = key
	else
	  pre_key   += key
	  pre_value += key
	  ## FIXME: !!!
	  ##	  set_word (pre_key, pre_value, "ʸ", context)
	end
      else
	set_word(key, value, part, context)
	pre_key   = key
	pre_value = value
	context   = value
      end
    }
  end
end
