/* This file is part of Malaga, a system for Natural Language Analysis.
 * Copyright (C) 1995-1999 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* description ==============================================================*/

/* The generation commands for malaga. */

/* includes =================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <setjmp.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "input.h"
#include "commands.h"
#include "rule_type.h"
#include "rules.h"
#include "lexicon.h"
#include "analysis.h"
#include "debugger.h"

#ifdef HANGUL
#include "hangul.h"
#endif

#undef GLOBAL
#define GLOBAL
#include "generation.h"

/* types ====================================================================*/

typedef struct SEGMENT_STATE_T /* a running generation LAG state */
{
  struct SEGMENT_STATE_T *next; /* next state in this list */
  value_t cat; /* result category of this state */
  int_t rule_set; /* rule_set of this state */
} segment_state_t;

typedef struct SEGMENT_T /* a segment of generation output */
{
  struct SEGMENT_T *previous;
  string_t surface; /* surface of this segment */
  segment_state_t *states; /* list of running states after combination */
} segment_t;

typedef struct ITEM_CAT_T /* a category node for an item */
{
  struct ITEM_CAT_T *next; /* next category */
  value_t value; /* category value */
} item_cat_t;

typedef struct ITEM_T /* segment a word or sentence may consist of */
{
  struct ITEM_T *next;
  string_t surf; /* surface of this item */
  item_cat_t *cats; /* categories for this item */
} item_t;

/* variables ================================================================*/

LOCAL grammar_t grammar; /* grammar used for generation */
LOCAL int_t num_results; /* index of the current word form */

LOCAL int_t num_segments;
LOCAL int_t max_segments; /* user limit on segments in a word form */
LOCAL segment_t *last_segment;
LOCAL rule_type_t current_rule_type;

LOCAL item_t *items;

/* functions ================================================================*/

LOCAL void combine_surface (text_t text, segment_t *segment)
{
  if (segment != NULL)
  {
    combine_surface (text, segment->previous);
    if (grammar == SYNTAX && segment->previous != NULL)
      add_to_text (text, " ");
    add_to_text (text, segment->surface);
  }
}

/*---------------------------------------------------------------------------*/

LOCAL string_t local_get_surface (surface_t surface_type)
/* Return surface <surface_type> for currently executed rule.
 * The result must be freed after use. */
{
  text_t text = new_text ();
  string_t string;

  switch (surface_type)
  {
  case RESULT_SURFACE:
    combine_surface (text, last_segment);
    break;
  case LEFT_SURFACE:
    if (current_rule_type == COMBI_RULE)
      combine_surface (text, last_segment->previous);
    else
      combine_surface (text, last_segment);
    break;
  case RIGHT_SURFACE:
    if (current_rule_type == COMBI_RULE)
      add_to_text (text, last_segment->surface);
    break;
  default:
    error ("internal (unknown surface type)");
  }
  string = new_string_readable (text_string (text), NULL);
  free_text (&text);
  return string;
}

/*---------------------------------------------------------------------------*/

LOCAL void local_add_end_state (value_t cat)
/* Print end state, consisting of <cat>. */
{
  string_t surf = local_get_surface (RESULT_SURFACE);

  /* Print index of word form. */
  num_results++;
#ifdef HANGUL
  decode_hangul (&surf);
#endif
  printf ("%ld: %s\n", num_results, surf);
  free_mem (&surf);
}

/*---------------------------------------------------------------------------*/

LOCAL void local_add_running_state (value_t cat, int_t rule_set)
/* Add running state, consisting of <cat> and <rule_set>. */
{
  segment_state_t **state_ptr;
  segment_state_t *new_state;

  new_state = new_mem (sizeof (segment_state_t));
  new_state->cat = new_value (cat);
  new_state->rule_set = rule_set;
  new_state->next = NULL;

  /* Add state to current segment */
  state_ptr = &last_segment->states;
  while (*state_ptr != NULL)
    state_ptr = &(*state_ptr)->next;
  *state_ptr = new_state;
}

/*---------------------------------------------------------------------------*/

LOCAL void push_segment (string_t surface)
/* Push a new segment with name <surface>. */
{
  segment_t *segment = new_mem (sizeof (segment_t));

  segment->surface = surface;
  segment->previous = last_segment;
  last_segment = segment;
  num_segments++;
}

/*---------------------------------------------------------------------------*/

LOCAL void pop_segment (void)
/* Pop the topmost segment. */
{
  segment_state_t *state, *next_state;
  segment_t *previous_segment = last_segment->previous;

  num_segments--;
  for (state = last_segment->states; state != NULL; state = next_state)
  {
    next_state = state->next;
    free_mem (&state->cat);
    free_mem (&state);
  }
  free_mem (&last_segment);
  last_segment = previous_segment;
}

/*---------------------------------------------------------------------------*/

LOCAL void execute_rules (value_t start_cat, int_t rule_set, item_t *item)
/* Add <item> to LAG state (<start_cat>, <rule_set>).
 * Save the resulting states in <last_segment->states> or print them
 * if they are end states. */
{
  rule_sys_t *rule_sys = rule_system[grammar];
  int_t *rule_ptr;
  item_cat_t *next_cat;

  for (next_cat = item->cats; next_cat != NULL; next_cat = next_cat->next)
  {
    for (rule_ptr = rule_sys->rule_sets + rule_set; *rule_ptr >= 0; rule_ptr++)
    {
      if (rule_sys->rules[*rule_ptr].type == COMBI_RULE)
      {
	current_rule_type = COMBI_RULE;
	top = 0;
	push_value (start_cat);
	push_value (next_cat->value);
	push_string_value (item->surf, NULL);
	push_number_value (num_segments);
	execute_rule (rule_sys, *rule_ptr);
      }
    }
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void local_generate (void)
/* Generate all word forms or sentences (according to <grammar>)
 * that are successors of <states> and print them immediately. */
{
  item_t *item;
  segment_state_t *state;
  int_t *rule_ptr;
  rule_sys_t *rule_sys = rule_system[grammar];
    
  check_user_break ();

  /* Execute end rules first. */
  for (state = last_segment->states; state != NULL; state = state->next)
  {
    for (rule_ptr = rule_sys->rule_sets + state->rule_set; 
	 *rule_ptr >= 0; 
	 rule_ptr++)
    {
      if (rule_sys->rules[*rule_ptr].type == END_RULE)
      {
	current_rule_type = END_RULE;
	top = 0;
	push_value (state->cat);
	execute_rule (rule_sys, *rule_ptr);
      }
    }
  }
    
  /* Don't execute combi_rules if too many segments are to be combined. */
  if (num_segments >= max_segments)
    return;

  /* Execute rules with all <items>. */
  for (item = items; item != NULL; item = item->next)
  {
    push_segment (item->surf);
    for (state = last_segment->previous->states; 
	 state != NULL; 
	 state = state->next)
      execute_rules (state->cat, state->rule_set, item);
    if (last_segment->states != NULL)
      local_generate ();
    pop_segment ();
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void generate (void)
/* Generate a sentence or a word form */
{
  item_t *item;
  rule_sys_t *rule_sys = rule_system[grammar];

  while (last_segment != NULL)
    pop_segment ();

  num_segments = num_results = 0;
  add_running_state = local_add_running_state;
  add_end_state = local_add_end_state;
  get_surface = local_get_surface;
  set_debug_mode (RUN_MODE, NULL);

  for (item = items; item != NULL; item = item->next)
  {
    push_segment (item->surf);
    execute_rules (rule_sys->values + rule_sys->initial_cat,
		   rule_sys->initial_rule_set, item);
    if (last_segment->states != NULL)
      local_generate ();
    pop_segment ();
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void free_item_cats (item_t *item)
/* Free the categories in <item>. */
{
  item_cat_t *cat, *next_cat;

  for (cat = item->cats; cat != NULL; cat = next_cat)
  {
    next_cat = cat->next;
    free_mem (&cat->value);
    free_mem (&cat);
  }
  item->cats = NULL;
}

/*---------------------------------------------------------------------------*/

LOCAL void free_items (void)
/* Free the item list. */
{
  item_t *item, *next_item;

  for (item = items; item != NULL; item = next_item)
  {
    next_item = item->next;
    free_item_cats (item);
    free_mem (&item->surf);
    free_mem (&item);
  }
  items = NULL;
}

/*---------------------------------------------------------------------------*/

LOCAL void generate_command (string_t arguments)
/* Generate sentences or words from items, depending on <grammar>. */
{
  item_t *item;
      
  if (in_debugger)
    error ("in debug mode");

  if (rule_system[grammar] == NULL)
    error ("rule file not loaded");

  max_segments = parse_int (&arguments);
  if (max_segments < 1)
    error ("must generate one segment at least");

  if (*arguments != EOS) /* Only read new items if there are any. */
  {
    item_t **item_ptr;

    free_items ();

    item_ptr = &items;
    while (*arguments != EOS) 
    {
      (*item_ptr) = new_mem (sizeof (item_t));
      (*item_ptr)->surf = parse_word (&arguments);
#ifdef HANGUL
      encode_hangul (&(*item_ptr)->surf);
#endif
      item_ptr = &(*item_ptr)->next;
    }
  }

  /* Create categories for <items>. */
  for (item = items; item != NULL; item = item->next)
  {
    item_cat_t **cat_ptr;
    
    free_item_cats (item);
    cat_ptr = &item->cats;
    if (grammar == MORPHOLOGY)
    {
      string_t surf_end;
      value_t cat;

      search_for_prefix (item->surf);
      while (get_next_prefix (&surf_end, &cat))
      {
	if (*surf_end == EOS) /* found a prefix that covers the whole string */
	{
	  *cat_ptr = new_mem (sizeof (item_cat_t));
	  (*cat_ptr)->value = new_value (cat);
	  cat_ptr = &(*cat_ptr)->next;
	}
      }
    }
    else
    {
      value_t morph_result;
      
      /* Call morphological analysis to get right-categories. */
      analyse (MORPHOLOGY, item->surf, FALSE, TRUE);
      
      /* Add all morphological results to category list. */
      reset_analysis_results ();
      for (morph_result = get_next_analysis_result ();
	   morph_result != NULL;
	   morph_result = get_next_analysis_result ())
      {
	*cat_ptr = new_mem (sizeof (item_cat_t));
	(*cat_ptr)->value = new_value (morph_result);
	cat_ptr = &(*cat_ptr)->next;
      }
    }
  }
  
  generate ();
  
  for (item = items; item != NULL; item = item->next)
    free_item_cats (item);
}

/*---------------------------------------------------------------------------*/

LOCAL void do_mg (string_t arguments)
/* Generate morphologically. */
{
  grammar = MORPHOLOGY;
  generate_command (arguments);
}

GLOBAL command_t mg_command =
{
  "mg", do_mg,
  "Generate all word forms that consist only of the given allomorphs.\n"
  "Usage:\n"
  "  mg <max_num_of_allos> <allomorph> ... -- use <allomorph> ...\n"
  "  mg <max_num_of_allos> -- use allomorphs of last generation command\n"
  "\"mg\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_sg (string_t arguments)
/* Generate syntactically. */
{
  grammar = SYNTAX;
  generate_command (arguments);
}

GLOBAL command_t sg_command =
{
  "sg", do_sg,
  "Generate all sentences that consist only of the given word forms.\n"
  "Usage:\n"
  "  sg <max_num_of_words> <word> ... -- use <word> ...\n"
  "  sg <max_num_of_words> -- use words of last generation command\n"
  "\"sg\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

GLOBAL void init_generation (void)
/* Initialise this module. */
{
}

/*---------------------------------------------------------------------------*/

GLOBAL void terminate_generation (void)
/* Terminate this module. */
{
  while (last_segment != NULL)
    pop_segment ();
  free_items ();
}

/* end of file ==============================================================*/
