#!ruby
# Delphi Parser - Tokenizer
# author: YOSHIDA Kazuhiro (moriq)
# mailto: moriq@moriq.com

class Tokenizer

private

def putch
  @offset-= @chsize
  @chsize = @prechsize
  @ch = @source[@offset, @chsize]
end

def getch
  raise if @source[@offset].nil?
  @prechsize = @chsize
  if @source[@offset] >= 0x80
    @chsize = 2
  else
    @chsize = 1
  end
  @ch = @source[@offset, @chsize]
  @offset+= @chsize
  @ch
end

  ConditionalDefine = %w/ MSWINDOWS /

def initialize
  @cond_stack = [true]
  @cond_table = {}
  ConditionalDefine.each do |i|
   @cond_table[i.downcase] = true
  end
end

def apply_compiler_directive(cd)
  cd.strip!
  cd.downcase!
  case cd
  when /^\$ifdef\s+(\w+)/
    conditional_symbol = $1
    @cond_stack.push @cond_table[conditional_symbol]
  when /^\$ifndef\s+(\w+)/
    conditional_symbol = $1
    @cond_stack.push ! @cond_table[conditional_symbol]
  when /^\$else/
    @cond_stack.push ! @cond_stack.pop
  when /^\$define\s+(\w+)/
    conditional_symbol = $1
    @cond_table[conditional_symbol] = true
  when /^\$undef\s+(\w+)/
    conditional_symbol = $1
    @cond_table[conditional_symbol] = nil
  when /^\$endif/
    @cond_stack.pop
  end
end

def gettoken
  token = ''
  case @ch
  when "'"
    token+= @ch
    while true
      getch
      case @ch
      when "'"
        token+= @ch
        getch
        if @ch != "'"
          break
        end
      when "\n"
        raise
      end
      token+= @ch
    end
  when "#"
    token+= @ch
    while true
      getch
      unless /\d/ =~ @ch
        break
      end
      token+= @ch
    end
    raise if token.empty?
  # token = token.to_i
  when "$"
    token+= @ch
    while true
      getch
      unless /[\dA-Fa-f]/ =~ @ch
        break
      end
      token+= @ch
    end
    raise if token.empty?
  # token = token.hex
  when /\d/
    point_p = false
    while true
      token+= @ch
      getch
      case @ch
      when '.'
        break if point_p
        getch
        case @ch
        when /\./
          putch
          break
        else
          token+= '.'
        end
        point_p = true
      when 'E', 'e'
        break unless point_p
        getch
        case @ch
        when /[\d+-]/
          token+= 'e'
        else
          putch
          break
        end
      when /\d/
        #
      else
        break
      end
    end
  # token = token.to_f
  when "{"
getch
if @ch == "$"
  # Delphi compiler directive
  cd = '$'
    while true
      getch
      cd+= @ch
      if @ch == '}'
        break
      end
    end
    getch
  cd.chop!  # }
  apply_compiler_directive(cd)
else
putch
  cm = ''
    while true
      getch
      cm+= @ch
      if @ch == '}'
        break
      end
    end
    getch
  cm.chop!  # }
  @d.outputs "comment #{cm}"
end
    token = ' '
  when "("
    getch
    if @ch == "*"
getch
if @ch == "$"
  # Delphi compiler directive
  cd = '$'
    state = 0
    while true
      getch
      cd+= @ch
      case state
      when 0
        if @ch == '*'
          state = 1
        end
      when 1
        if @ch == ')'
          break
        end
        state = 0
      end
    end
    getch
  cd.chop!  # )
  cd.chop!  # *
  apply_compiler_directive(cd)
else
putch
  cm = ''
    state = 0
    while true
      getch
      cm+= @ch
      case state
      when 0
        case @ch
        when '*'
          state = 1
        end
      when 1
        case @ch
        when ')'
          break
        when '*'
        else
          state = 0
        end
      end
    end
    getch
  cm.chop!  # )
  cm.chop!  # *
  @d.outputs "comment #{cm}"
end
      token = ' '
    else
      token = "("
    end
  when "/"
    getch
    if @ch == "/"
      begin
        getch
      end until /\n/ =~ @ch
      getch
      token = ' '
    else
      token = "/"
    end
  when "<"
    token = @ch
    getch
    case @ch
    when "=", ">"
      token+= @ch
      getch
    end
  when ">"
    token = @ch
    getch
    case @ch
    when "="
      token+= @ch
      getch
    end
  when ":"
    token = @ch
    getch
    case @ch
    when "="
      token+= @ch
      getch
    end
  when "."
    token = @ch
    getch
    case @ch
    when "."
      token+= @ch
      getch
    end
  when /\s/
    begin
      getch
    end while /\s/ =~ @ch
    token = ' '
  when /\w/
    begin
      token+= @ch#.downcase
      getch
    end while /\w/ =~ @ch
  else
    token = @ch
    getch
  end
  return token, @cond_stack[-1]
end

public

def next_token
  ret = @token
  if @tmptoken
    @token = @tmptoken
    @tmptoken = nil
  else
    begin
      token, cond = gettoken
    end while token == ' ' || ! cond
    @token = token
  end
  @d.outp ret
  ret
end

def put_token(token)
  @tmptoken = @token
  @token = token
end

def load(fname)
  @source = open(fname).read
  @offset = 0
  getch
  next_token
end

Reserved = %w/ unit interface dispinterface implementation begin end uses type const var threadvar procedure function constructor destructor property resourcestring case /
# private protected public published
ReservedCheck = {}
Reserved.each do |i|
  ReservedCheck[i] = true
end

def ident?(str)
  return false if ReservedCheck[str]
  /^['$#\w]/ =~ str
end

def check(i)
  @d.outputs "check #{@token} #{i}"
  ret = @token
  case i
  when :ident
    raise "`#{ret}' is not ident" unless ident?(ret)
  else
    raise "`#{ret}' is not `#{i}'" unless ret.downcase == i.downcase
  end
  next_token
end

def dc_token
  @token.downcase
end

def deb=(v)
  @d = v
end

end # of class Tokenizer
