/*
 * ʸζ򸡽Ф롣
 * ʸextentϢɾơ⤯ʤ褦extentǤ
 *
 * metawordˤͥõԤ
 * ΥΡɤAstar_nodesearch_statѤΥ塼
 *
 * anthy_eval_border() ǻꤵ줿ΰʸʬ䤹
 *
 * Funded by IPA̤Ƨեȥ¤ 2001 10/29
 * Copyright (C) 2000-2003 TABATA Yusuke, UGAWA Tomoharu
 */
#include <stdio.h>
#include <stdlib.h>

#include <alloc.h>
#include <splitter.h>
#include "wordborder.h"

/**/
#define ASTAR_MAX_DEPTH 8
#define ASTAR_NORMAL_DEPTH 4
#define ASTAR_HEAP_DEPTH 1024
#define ASTAR_EXPAND_LIMIT 4096

/* ΥΡ */
struct Astar_node {
  /* ޤǸextent */
  int nr_ex;
  struct extent *ex[ASTAR_MAX_DEPTH];

  /* ΥΡɤξ */
  int score;
  int from;
  int len;
  /**/
  int heap_score;
};

/*
 * Хʸξ
 *
 * ǤҡפȤΤϸϤΤĤǤϤʤơ
 * 2ʬڤޤǡֹnλҤ2n2n+1ˤʤäƤ롣
 * ƤϻҥΡɤ礭ȤȤinvariant
 * Ȥ뤳Ȥˤ롼ȥΡɤˤϺǤ¸ߤ롣
 */
struct search_stat {
  /* ǤäȤ⥹ι⤤Ρ */
  struct Astar_node best;
  /* θ³ο */
  int search_depth;
  /* õΥҡ */
  struct Astar_node **heap;
  /* ҡפο */
  int nr_nodes;
  /**/
  int expand_count;
};

static allocator node_ator;


/** (ʤɤϽ)ʸĹΥХ󥹤׻ 
 * ٤礦ʸĹᤤۤɼʸǤȲ
 *
 *  -((ʸĹ^2))*ʸ 
 * Ƥʸ᤬ƱĹλ
 *  -(ʸĹ^2) Ȥʤ롣ʸ˰¸ʤ褦ꤷƤ롣
 *
 * ʸĹĹϥåȥդ
 */
static int
calc_balance(struct Astar_node *an)
{
  int i;
  int bal ,nr_seg;
  int total_len;

  bal = 0;
  nr_seg = 0;
  total_len = 0;

  for (i = 0; i < an->nr_ex; i++) {
    struct extent *ex = an->ex[i];
    /* ⤷ɤΥ饹ˤ°ʤʸʤСڤȤϤߤʤʤ */
    if (!ex->is_dummy_char) {
      int len = an->ex[i]->len;
      if (len > 5 * ex->mw_count) {
	/* ʸĹĹϥåȥդ */
	len = 5 * ex->mw_count;
      }
      /* (len/mw_count)^2*mw_count */
      bal += len * len / ex->mw_count;
      nr_seg += ex->mw_count;
      total_len += len;
    }
  }
  bal *= SCORE_PER_LEN * nr_seg;
  if (total_len) {
    bal /= (total_len * total_len);
  } else {
    bal = 0;
  }

  return bal;
}

static void 
print_astar_node(struct splitter_context *sc,
		 struct Astar_node *an)
{
  int i;
  printf("A*node score=%d balance=(%d)\n", an->score, calc_balance(an));
  for (i = 0; i < an->nr_ex; i++) {
    if (an->ex[i]->best) {
      anthy_print_metaword(sc, an->ex[i]->best);
    } else {
      printf("no meta word.\n");
    }
  }
  printf("\n");
}

static struct Astar_node *
alloc_astar_node(void)
{
  struct Astar_node *a = anthy_smalloc(node_ator);
  a->nr_ex = 0;
  a->len = 0;
  return a;
}

/*
 * ˤäƷꤵ줿metawordˤä
 * metawordκüʸᶭޡ
 */
static void 
mark_by_metaword(struct splitter_context *sc,
		 struct meta_word *mw)
{
  struct word_split_info_cache *info = sc->word_split_info;
  if (!mw) {
    return ;
  }
  switch (anthy_metaword_type_tab[mw->type].mark) {
  case MW_MARK_PAIR:
    mark_by_metaword(sc, mw->mw1);
    mark_by_metaword(sc, mw->mw2);
    break;
  case MW_MARK_OCHAIRE:
    mark_by_metaword(sc, mw->mw1);
    break;
  case MW_MARK_OCHAIRE_LEAF:
    info->seg_border[mw->from] = 1;
    if (mw->mw1) {
      mark_by_metaword(sc, mw->mw1);
    }
    break;
  case MW_MARK_WRAP:
    mark_by_metaword(sc, mw->mw1);
    break;
  case MW_MARK_WL:
    if (mw->wl) {
      info->seg_border[mw->wl->from] = 1;
    }
    break;
  case MW_MARK_LEFT_WL:
    if (mw->mw1->wl) {
      info->seg_border[mw->mw1->wl->from] = 1;
    }
    break;
  case MW_MARK_NONE:
  default:
    printf("try to mark unknown type of metaword (%d).\n",
	   anthy_metaword_type_tab[mw->type].mark);
  }
}

/*
 * ҡפΥ롼ȤΤǥ롼Ȥ
 * ƥΡɤ򤺤餹
 */
static void
rebalance_astar_heap_from_root(struct search_stat *ss, int n)
{
  int l, r;
  int ls, rs, ns;
  struct Astar_node *nn, *ln, *rn;
  l = 2 * (n + 1) - 1;
  r = 2 * (n + 1);
  ls = -2000000000;
  rs = -2000000000;
  ln = 0;
  rn = 0;
  ns = ss->heap[n]->heap_score;
  nn = ss->heap[n];
  if (l < ASTAR_HEAP_DEPTH) {
    ln = ss->heap[l];
  }
  if (r < ASTAR_HEAP_DEPTH) {
    rn = ss->heap[r];
  }
  if (ln) {
    ls = ln->heap_score;
  }
  if (rn) {
    rs = rn->heap_score;
  }
  if (ns > ls && ns > rs) {
    return ;
  }
  if (ls > rs) {
    ss->heap[n] = ln;
    ss->heap[l] = nn;
    rebalance_astar_heap_from_root(ss, l);
  } else {
    ss->heap[n] = rn;
    ss->heap[r] = nn;
    rebalance_astar_heap_from_root(ss, r);
  }
}

static void
eval_astar_node(struct Astar_node *an)
{
  int i, bonus;
  struct extent *ex;
  an->score = 0;

  if (an->nr_ex == 0) {
    return ;
  }

  for (i = 0; i < an->nr_ex; i++) {
    ex = an->ex[i];
    an->score += ex->score;
  }

  /* ʸФ */
  bonus = SCORE_PER_LEN / 20 * an->len;
  if (an->nr_ex == 1) {
    an->score += bonus;
  } else {
    an->score += bonus * (an->len - an->ex[0]->len) / an->len;
  }

  /* ʸХ󥹤ˤѲ*/
  an->score += SCORE_PER_LEN;
  an->score -= calc_balance(an);

  /* ʸˤ븺 */
  an->score -= an->nr_ex * SCORE_PER_LEN / 4;
}

static void
push_astar_node(struct search_stat *ss,
		struct Astar_node *an)
{
  int n, p;

  /* heapοۤΤǥ */
  if (ss->nr_nodes >= ASTAR_HEAP_DEPTH - 1) {
    anthy_sfree(node_ator, an);
    return ;
  }

  /* ɾԤ */
  eval_astar_node(an);

  /* Ūˤͥ */
  an->heap_score = ss->expand_count * 10;
  if (an->nr_ex) {
    /* ʿѤĹ㤤Τϸ */
    an->heap_score += SCORE_PER_LEN * an->len / an->nr_ex;
  }
  

  /* ҡפѤ */
  ss->heap[ss->nr_nodes] = an;
  ss->nr_nodes ++;

  /* leaf¦rebalance */
  n = ss->nr_nodes - 1;
  while (n) {
    p = (n - 1) / 2;
    if (ss->heap[p]->heap_score < ss->heap[n]->heap_score) {
      struct Astar_node *tmp;
      tmp = ss->heap[p];
      ss->heap[p] = ss->heap[n];
      ss->heap[n] = tmp;
    }
    n = p;
  }
}

static struct Astar_node *
pop_astar_node(struct search_stat *ss)
{
  struct Astar_node *n;
  n = ss->heap[0];
  if (!n) {
    return 0;
  }

  ss->nr_nodes--;
  ss->heap[0] = ss->heap[ss->nr_nodes];
  ss->heap[ss->nr_nodes] = 0;
  if (ss->nr_nodes) {
    /* root¦rebalance */
    rebalance_astar_heap_from_root(ss, 0);
  }
  return n;
}

static void
do_expand_astar_node(struct splitter_context *sc,
		     struct search_stat *ss,
		     struct Astar_node *an)
{
  /* anαüfrom */
  int from = an->from + an->len;
  int i;

  for (i = sc->char_count - from; i >= 1; i--) {
    struct Astar_node *n;
    struct extent *ex;
    ex = anthy_find_extent(sc, from, i, 0);
    if (!ex) {
      continue;
    }

    n = alloc_astar_node();
    *n = *an;
    n->ex[n->nr_ex] = ex;
    n->nr_ex++;
    n->len += i;
    push_astar_node(ss, n);
  }
}

/*
 * ΡɤŸ 
 *  Ÿdo_expand_astar_nodeǹԤ
 */
static void
expand_astar_node(struct splitter_context *sc,
		  struct search_stat *ss,
		  struct Astar_node *an, int to)
{
  int t = an->from + an->len;

  /* üΥΡɤãΤǡõؤ */
  if (t == to) {
    ss->search_depth = ASTAR_MAX_DEPTH;
  }

  if (an->nr_ex < ss->search_depth && t <= to) {
    do_expand_astar_node(sc, ss, an);
  }

  if (anthy_splitter_debug_flags() & SPLITTER_DEBUG_AN) {
    print_astar_node(sc, an);
  }
  /* ޤǤκǹ⥹򹹿 */
  if (an->score > ss->best.score) {
    ss->best = *an;
  }

  /* ŸѤΥΡɤ */
  anthy_sfree(node_ator, an);
}

/** search_stat롢ʸꤹ뤴Ȥ˸ƤФ */
static void
init_search_stat(struct search_stat *ss)
{
  int i;

  ss->heap =
    malloc(sizeof(struct Astar_node *) * ASTAR_HEAP_DEPTH);
  for (i = 0; i < ASTAR_HEAP_DEPTH; i++) {
    ss->heap[i] = 0;
  }
  ss->nr_nodes = 0;
  ss->best.score = 0;
  ss->search_depth = ASTAR_NORMAL_DEPTH;
}

/* search_stat롢ʸꤹ뤴Ȥ˸ƤФ */
static void
free_search_stat(struct search_stat *ss)
{
  int i;
  for (i = 0; i < ASTAR_HEAP_DEPTH; i++) {
    if (ss->heap[i]){
      anthy_sfree(node_ator, ss->heap[i]);
    }
  }
  ss->nr_nodes = 0;
  free (ss->heap);
}

static void
mark_by_extent(struct splitter_context *sc,
	       struct extent *ex)
{
  mark_by_metaword(sc, ex->best);
}

/*
 * ʸζդ
 * ֤ͤϤθդʸαüΥǥå
 */
static int
do_split(struct splitter_context *sc,
	 int from, int to)
{
  struct search_stat ss;
  struct Astar_node *a;

  /* ν */
  init_search_stat(&ss);

  /* ǽnodeѤ */
  a = alloc_astar_node();
  a->from = from;
  push_astar_node(&ss, a);

  /* Ԥ */
  for (ss.expand_count = ASTAR_EXPAND_LIMIT;
       ss.expand_count > 0; ss.expand_count--) {
    a = pop_astar_node(&ss);
    if (!a) {
      /* ΡݥɤԤ */
      goto out;
    }
    /* Ÿ */
    expand_astar_node(sc, &ss, a, to);
  }
 out:

  /* ΤΥǡ */
  free_search_stat(&ss);

  if (ss.best.score == 0) {
    /* ˼ԤΤǰֱޤǤǤȤˤƤޤ */
    return to;
  }

  /*print_astar_node(sc, &ss->best);
    anthy_print_metaword(sc, ss.best.ex[0]->best);*/
  /* äȤ⥹ι⤤extentʸڤޡ */
  if (ss.best.ex[0]->best) {
    mark_by_extent(sc, ss.best.ex[0]);
  }
  return from + ss.best.ex[0]->len;
}

/** Ĥextentɾ
 */
static void
eval_extent(struct extent *ex)
{
  struct meta_word *mw, *max = NULL;
  int best_score = 0;

  /* ƱextentǺscoremeta_wordդ */
  for (mw = ex->mw; mw; mw = mw->same_extent) {
    if (mw->can_use == ok) {
      int score;
      /**/
      score = mw->score;
      score -= mw->mw_count * SCORE_PER_LEN / 32;
      /**/
      if (!max || (best_score < score)) {
	best_score = score;
	max = mw;
      }
    }
  }

  /* extentscore */
  if (max) {
    /* metawordСäȤ⥹ι⤤metawordΥѤ */
    ex->score = best_score;
    ex->mw_count = max->mw_count;
  } else {
    /* metawordʤСĹʬΥդ */
    ex->score = SCORE_PER_LEN * ex->len;
  }
  ex->best = max;
}

/* 
 * Ƥextentɾ
 */
static void
eval_extent_all(struct splitter_context *sc)
{
  int from, len;
  for (from = 0; from < sc->char_count; from++) {
    /*
     * Ĺ1extent̵ȡˤäƤϸʤʤʤΤ
     * ޤĹ1extentŪ˺
     */
    anthy_find_extent(sc, from, 1, 1);

    /*
     * extentФƷ׻
     */
    for (len = 1; len <= sc->char_count - from; len++) {
      struct extent *ex = anthy_find_extent(sc, from, len, 0);
      if (ex) {
	eval_extent(ex);
      }
    }
  }
}

/* ʸᶭ­ʤʸ */
static void
seg_constraint_check_all(struct splitter_context *sc,
			 int from, int to)
{
  int i, j;
  for (i = from; i < to; i++) {
    struct word_list *wl;
    for (wl = sc->word_split_info->cnode[i].wl;
	 wl; wl = wl->next) {
      wl->can_use = ok;
      /* wlΥСϰϤʸζng */
      for (j = 1; j < wl->len; j++) {
	if (sc->ce[i + j].seg_border) {
	  wl->can_use = ng;
	}
      }
    }
  }
}

/*
 * ƵŪmetawordѲǽå
 */
static void
metaword_constraint_check(struct splitter_context *sc,
			  struct meta_word *mw)
{
  struct word_split_info_cache *info = sc->word_split_info;
  if (mw->can_use != unchecked) {
    return ;
  }
  switch(anthy_metaword_type_tab[mw->type].check){
  case MW_CHECK_WL_STR:
    if (!mw->wl) {
      int i;
      mw->can_use = ok;
      for (i = 1; i < mw->len; i++) {
	if (sc->ce[i + mw->from].seg_border) {
	  mw->can_use = ng;
	}
      }
    }
    /* break̵ */
  case MW_CHECK_WL_SINGLE:
    if (!mw->wl || mw->wl->can_use == ok) {
      mw->can_use = ok;
    } else {
      mw->can_use = ng;
    }
    break;
  case MW_CHECK_WL_WRAP:
    metaword_constraint_check(sc, mw->mw1);
    mw->can_use = mw->mw1->can_use;
    break;
  case MW_CHECK_BORDER:
    if (info->seg_border[mw->mw1->from + mw->mw1->len]) {
      /* 礦ɶܤ˥ޡäƤ */
      mw->can_use = ng;
      break;
    }
    /* break̵ */
  case MW_CHECK_PAIR:
    metaword_constraint_check(sc, mw->mw1);
    metaword_constraint_check(sc, mw->mw2);
    if (mw->mw1->can_use == ok && mw->mw2->can_use == ok) {
      mw->can_use = ok;
    }
    break;
  case MW_CHECK_OCHAIRE:
    {
      int i;
      struct meta_word* mw1;
      for (mw1 = mw; mw1; mw1 = mw1->mw1) {
	mw1->can_use = ok;
      }
      for (i = mw->from + 1; i < mw->from + mw->len; i++) {
	if (sc->ce[i].seg_border) {
	  for (mw1 = mw; mw1; mw1 = mw1->mw1) {
	    mw1->can_use = ng;
	  }
	  break;
	}
      }
    }
    break;
  case MW_CHECK_NONE:
    break;
  default:;
    printf("try to check unknown type of metaword (%d).\n", mw->type);
  }
}

/*
 * word_listξmetawordѤǤ뤫å
 */
static void
metaword_constraint_check_all(struct splitter_context *sc,
			      int from, int to)
{
  int i;
  struct word_split_info_cache *info;
  info = sc->word_split_info;

  /* ޤuncheckedˤ */
  for (i = from; i < to; i ++) {
    struct meta_word *mw;
    for (mw = info->cnode[i].mw;
	 mw; mw = mw->next) {
      mw->can_use = unchecked;
    }
  }

  /* ˹줿metawordˤĤƥå */
  for (i = from; i < to; i ++) {
    struct meta_word *mw;
    for (mw = info->cnode[i].mw; mw; mw = mw->next) {
      metaword_constraint_check(sc, mw);
    }
  }
}

/*
 * ʸᶭޡ
 */
void
anthy_eval_border(struct splitter_context *sc, int from, int to)
{
  node_ator = anthy_create_allocator(sizeof(struct Astar_node), 0);
  /* ʸΤȤΤΤ */
  seg_constraint_check_all(sc, from, to);
  metaword_constraint_check_all(sc, from, to);

  /* extentɾ */
  eval_extent_all(sc);

  do{
    from = do_split(sc, from, to);
    if (anthy_splitter_debug_flags() & SPLITTER_DEBUG_1) {
      break;
    }
  } while (from < to);
  anthy_free_allocator(node_ator);
}
