#!/usr/bin/python
# Copyright (c) 2013 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from __future__ import print_function

import sys
import os

import dfa_parser


class Generator(object):

  def __init__(self, dfa, c_file):
    self.dfa = dfa
    self.c_file = c_file

    self.code_table1 = {}
    self.code_table2 = {}

  def TransitionCode(self, transition):
    result = [a.body for a in transition.actions]
    result.append(
        'current_state = %s; current_position++; goto _again;\n' %
        transition.to_state.index)
    return tuple(result)

  def WriteData(self):
    self.c_file.write(
        'static const int _dfa_accepting[] = {%s};\n' %
        ', '.join('1' if state.is_accepting else '0'
                  for state in self.dfa.states))

    table = []
    for i, state in enumerate(self.dfa.states):
      elems = []
      for byte in range(256):
        t = state.forward_transitions.get(byte)
        if t is not None:
          code = self.TransitionCode(t)
        else:
          code = (self.dfa.error_action.body,)

        # Instead of a single switch with all possible sequences of actions,
        # we use two switches: one for the first 'half' of action sequence,
        # and one for the second 'half'.
        # It significantly reduces generated source size and compilation time
        # (because different action sequences can share 'halves')
        # at a cost of runtime performance.
        code1 = code[:1]
        code2 = code[1:]
        if code1 not in self.code_table1:
          self.code_table1[code1] = len(self.code_table1)
        if code2 not in self.code_table2:
          self.code_table2[code2] = len(self.code_table2)

        assert self.code_table1[code1] < 2**16
        assert self.code_table2[code2] < 2**16
        elems.append('{%s, %s}' % (self.code_table1[code1],
                                   self.code_table2[code2]))
      table.append('  {%s}' % ', '.join(map(str, elems)))

    self.c_file.write(
        'static const uint16_t _dfa_transitions[][256][2] = {\n%s\n};\n' %
        ',\n'.join(table))

  def WriteInit(self):
    self.c_file.write(
        'current_state = %d;\n' % self.dfa.initial_state.index)

  def WriteCodeTable(self, code_table, selector):
    self.c_file.write(
        'switch (%s) {\n' % selector)
    for code, index in sorted(code_table.items(), key=lambda (_, index): index):
      self.c_file.write(
          '  case %d:\n' % index)
      self.c_file.write(' '.join(code))
      self.c_file.write('  break;\n')
    self.c_file.write(
        '}\n')

  def WriteExec(self):
    self.c_file.write('_again:\n')
    self.c_file.write(
        'if (current_position == end_position) {\n'
        '  if (_dfa_accepting[current_state]) goto _done;\n'
        '  %s'
        '}\n' % self.dfa.error_action.body)

    self.WriteCodeTable(
        self.code_table1,
        '_dfa_transitions[current_state][*current_position][0]')
    self.WriteCodeTable(
        self.code_table2,
        '_dfa_transitions[current_state][*current_position][1]')

    self.c_file.write(
        '_done: ;\n')


def main():
  if len(sys.argv[1:]) != 3:
    print('Usage:')
    print('   %s <rl file> <xml file> <output c file>' %
          os.path.basename(__file__))
    sys.exit(1)
  rl_filename, xml_filename, c_filename = sys.argv[1:]

  dfa = dfa_parser.ParseXml(xml_filename)

  with open(rl_filename) as rl_file:
    lines = list(rl_file)

  with open(c_filename, 'wt') as c_file:
    c_file.write(
      '/*\n'
      ' * THIS FILE IS AUTO-GENERATED. DO NOT EDIT.\n'
      ' * Generated by %s from %s and %s.\n'
      ' */\n\n' % (
          os.path.basename(__file__),
          os.path.basename(rl_filename),
          os.path.basename(xml_filename)))

    generator = Generator(dfa, c_file)

    in_ragel_block = False
    for line in lines:
      if line.strip() == '%%{':
        assert not in_ragel_block
        in_ragel_block = True
      elif line.strip() == '}%%':
        assert in_ragel_block
        in_ragel_block = False
      elif line.strip() == '%% write data;':
        assert not in_ragel_block
        generator.WriteData()
      elif line.strip() == '%% write init;':
        assert not in_ragel_block
        generator.WriteInit()
      elif line.strip() == '%% write exec;':
        assert not in_ragel_block
        generator.WriteExec()
      else:
        if not in_ragel_block:
          c_file.write(line)

    assert not in_ragel_block


if __name__ == '__main__':
  main()
