/**
 * @file   graphout.c
 * @author Akinobu LEE
 * @date   Thu Mar 17 12:46:31 2005
 * 
 * <JA>
 * @brief  2ѥη̤ñ쥰շǽϤ롥
 * </JA>
 * 
 * <EN>
 * @brief  Output results in word graph format.
 * </EN>
 * 
 * $Revision: 1.6 $
 * 
 */
/*
 * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
 * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
 * All rights reserved
 */

#include <julius.h>

#ifdef GRAPHOUT

/// Define if you want debugging output for graph generation
#undef GDEBUG

/// Define if you want much more debugging output for graph generation
#undef GDEBUG2

static int *framelist;		///< frame list for adjust_boundary_sub
static LOGPROB *framescorelist;	///< frame score list for adjust_boundary_sub


/**************************************************************/
/* allocation and free of a WordGraph instance */

/** 
 * <JA>
 * ñ򿷤롥
 * 
 * @param wid [in] ñID
 * @param leftframe [in] ü(ե졼)
 * @param rightframe [in] ü(ե졼)
 * @param fscore_head [in] üǤʬʸ (g + h)
 * @param fscore_tail [in] üǤʬʸ (g + h)
 * @param gscore_head [in] ƬǤüViterbi (g)
 * @param gscore_tail [in] ǤüViterbi (g)
 * @param lscore [in] ñθ쥹 (Julian Ǥͤ˰̣ʤ)
 * @param cm [in] ñο٥ (õưŪ˷׻줿)
 * 
 * @return 줿ñؤΥݥ
 * </JA>
 * <EN>
 * Allocate a new graph word.
 * 
 * @param wid [in] word ID
 * @param leftframe [in] beginning time in frames
 * @param rightframe [in] end time in frames
 * @param fscore_head [in] sentence score on search at word head (g + h)
 * @param fscore_tail [in] sentence score on search at word tail (g + h)
 * @param gscore_head [in] Viterbi score accumulated from input end at word head (g)
 * @param gscore_tail [in] Viterbi score accumulated from input end at word tail (g)
 * @param lscore [in] language score of the word (bogus in Julian)
 * @param cm [in] word confidence score (computed on search time)
 * 
 * @return pointer to the newly created graph word.
 * </EN>
 */
static WordGraph *
wordgraph_new(WORD_ID wid, HMM_Logical *headphone, HMM_Logical *tailphone, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm)
{
  WordGraph *new;

  new = (WordGraph *)mymalloc(sizeof(WordGraph));
  new->wid = wid;
  new->lefttime = leftframe;
  new->righttime = rightframe;
  new->fscore_head = fscore_head;
  new->fscore_tail = fscore_tail;
  new->gscore_head = gscore_head;
  new->gscore_tail = gscore_tail;
#ifdef USE_NGRAM
  new->lscore = lscore;
#endif
#ifdef CM_SEARCH
  new->cmscore = cm;
#endif
  new->headphone = headphone;
  new->tailphone = tailphone;
  new->leftwordmaxnum = FANOUTSTEP;
  new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum);
  new->leftwordnum = 0;
  new->rightwordmaxnum = FANOUTSTEP;
  new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum);
  new->rightwordnum = 0;

  new->mark = FALSE;
#ifdef GRAPHOUT_DYNAMIC
  new->purged = FALSE;
#endif
  new->next = NULL;
  new->saved = FALSE;

#ifdef GDEBUG
 {
   int i;
   WordGraph *w;
   printf("NEW: \"%s\"[%d..%d]\n", wchmm->winfo->woutput[new->wid], new->lefttime, new->righttime);
   for(i=0;i<new->leftwordnum;i++) {
     w = new->leftword[i];
     printf("\t left%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
   }
   for(i=0;i<new->rightwordnum;i++) {
     w = new->rightword[i];
     printf("\tright%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
   }
   printf("\headphone: %s\n", new->headphone->name);
   printf("\tailphone: %s\n", new->tailphone->name);
 }
#endif

  return(new);
}

/** 
 * <JA>
 * ñΥΰ롥
 * 
 * @param wg [in] ñ
 * </JA>
 * <EN>
 * Free a graph word.
 * 
 * @param wg [in] graph word to be freed.
 * </EN>
 */
void
wordgraph_free(WordGraph *wg)
{
  free(wg->rightword);
  free(wg->leftword);
  free(wg);
}


/**************************************************************/
/* Handling contexts */

/** 
 * <JA>
 * 륰ñκƥȤ˿ʥñɲä롥
 * 
 * @param wg [i/o] ɲΥñ
 * @param left [in] @a wg κƥȤȤɲä륰ñ
 * </JA>
 * <EN>
 * Add a graph word as a new left context.
 * 
 * @param wg [i/o] word graph to which the @a left word will be added as left context.
 * @param left [in] word graph which will be added to the @a wg as left context.
 * </EN>
 */
static void
wordgraph_add_leftword(WordGraph *wg, WordGraph *left)
{
  if (wg == NULL) return;
  if (left == NULL) return;
  if (wg->leftwordnum >= wg->leftwordmaxnum) {
    /* expand */
    wg->leftwordmaxnum += FANOUTSTEP;
    wg->leftword = (WordGraph **)myrealloc(wg->leftword, sizeof(WordGraph *) * wg->leftwordmaxnum);
  }
  wg->leftword[wg->leftwordnum] = left;
  wg->leftwordnum++;
#ifdef GDEBUG
  printf("addleft: \"%s\"[%d..%d] added as %dth left of \"%s\"[%d..%d]\n", wchmm->winfo->woutput[left->wid], left->lefttime, left->righttime, wg->leftwordnum, wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
#endif
}

/** 
 * <JA>
 * 륰ñαƥȤ˿ʥñɲä롥
 * 
 * @param wg [i/o] ɲΥñ
 * @param left [in] @a wg αƥȤȤɲä륰ñ
 * </JA>
 * <EN>
 * Add a graph word as a new right context.
 * 
 * @param wg [i/o] word graph to which the @a right word will be added as
 * right context.
 * @param right [in] word graph which will be added to the @a wg as right
 * context.
 * </EN>
 */
static void
wordgraph_add_rightword(WordGraph *wg, WordGraph *right)
{
  if (wg == NULL) return;
  if (right == NULL) return;
  if (wg->rightwordnum >= wg->rightwordmaxnum) {
    /* expand */
    wg->rightwordmaxnum += FANOUTSTEP;
    wg->rightword = (WordGraph **)myrealloc(wg->rightword, sizeof(WordGraph *) * wg->rightwordmaxnum);
  }
  wg->rightword[wg->rightwordnum] = right;
  wg->rightwordnum++;
#ifdef GDEBUG
  printf("addright: \"%s\"[%d..%d] added as %dth right of \"%s\"[%d..%d]\n", wchmm->winfo->woutput[right->wid], right->lefttime, right->righttime, wg->rightwordnum, wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
#endif
}

/** 
 * <JA>
 * ƥȤ˻ꤷñ줬ˤ뤫ɤå
 * ʤɲä롥
 * 
 * @param wg [i/o] Ĵ٤륰ñ
 * @param left [in] Υñ줬 @a wg κƥȤˤ뤫å
 * 
 * @return Ʊñ줬ƥȤ¸ߤɲä TRUE,
 * ƥȤȤƱñ줬Ǥ¸ߤƤɲäʤä
 * FALSE֤
 * </JA>
 * <EN>
 * Check for the left context if the specified graph already exists, and
 * add it if not yet.
 * 
 * @param wg [i/o] graph word whose left context will be checked 
 * @param left [in] graph word to be checked as left context of @a wg
 * 
 * @return TRUE if not exist yet and has been added, or FALSE if already
 * exist and thus not added.
 * </EN>
 */
boolean
wordgraph_check_and_add_leftword(WordGraph *wg, WordGraph *left)
{
  int i;

  if (wg == NULL) return FALSE;
  if (left == NULL) return FALSE;
  for(i=0;i<wg->leftwordnum;i++) {
    if (wg->leftword[i] == left) {
      break;
    }
  }
  if (i >= wg->leftwordnum) { /* no leftword matched */
    wordgraph_add_leftword(wg, left);
    return TRUE;
  }
  return FALSE;
}

/** 
 * <JA>
 * ƥȤ˻ꤷñ줬ˤ뤫ɤå
 * ʤɲä롥
 * 
 * @param wg [i/o] Ĵ٤륰ñ
 * @param left [in] Υñ줬 @a wg αƥȤˤ뤫å
 * 
 * @return Ʊñ줬ƥȤ¸ߤɲä TRUE,
 * ƥȤȤƱñ줬Ǥ¸ߤƤɲäʤä
 * FALSE֤
 * </JA>
 * <EN>
 * Check for the right context if the specified graph already exists, and
 * add it if not yet.
 * 
 * @param wg [i/o] graph word whose right context will be checked 
 * @param right [in] graph word to be checked as right context of @a wg
 * 
 * @return TRUE if not exist yet and has been added, or FALSE if already
 * exist and thus not added.
 * </EN>
 */
boolean
wordgraph_check_and_add_rightword(WordGraph *wg, WordGraph *right)
{
  int i;

  if (wg == NULL) return FALSE;
  if (right == NULL) return FALSE;
  for(i=0;i<wg->rightwordnum;i++) {
    if (wg->rightword[i] == right) {
      break;
    }
  }
  if (i >= wg->rightwordnum) { /* no rightword matched */
    wordgraph_add_rightword(wg, right);
    return TRUE;
  }
  return FALSE;
}

/** 
 * <JA>
 * Ʊ쥰ñΥޡ,ñ쥰դΥƥȤ̤ñ쥰դ
 * ɲä롥
 * 
 * @param dst [i/o] ɲΥñ
 * @param src [in] ɲøΥñ
 * 
 * @return 1ĤǤ⿷ɲä TRUE, 1Ĥɲäʤ FALSE ֤
 * </JA>
 * <EN>
 * Add all the context words to other for merging the same two graph words.
 * 
 * @param dst [i/o] destination graph word
 * @param src [in] source graph word
 * 
 * @return TRUE if at least one context word has been newly added, or FALSE if
 * context on @a dst has not been updated.
 * </EN>
 */
static boolean
merge_contexts(WordGraph *dst, WordGraph *src)
{
  int s, d;
  WordGraph *adding;
  boolean ret;

#ifdef GDEBUG
  printf("merge_contexts: merging context of \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
	 wchmm->winfo->woutput[src->wid], src->lefttime, src->righttime,
	 wchmm->winfo->woutput[dst->wid], dst->lefttime, dst->righttime);
#endif

  ret = FALSE;
  
  /* left context */
  for(s=0;s<src->leftwordnum;s++) {
    adding = src->leftword[s];
    if (adding->mark) continue;
    /* direct link between dst and src will disapper to avoid unneccesary loop */
    if (adding == dst) {
#ifdef GDEBUG
      printf("merge_contexts: skipping direct link (dst) -> (src)\n");
#endif
      continue;
    }
    for(d=0;d<dst->leftwordnum;d++) {
      if (dst->leftword[d]->mark) continue;
      if (dst->leftword[d] == adding) {
	break;
      }
    }
    if (d >= dst->leftwordnum) { /* no leftword matched */
      wordgraph_add_leftword(dst, adding);
#ifdef GDEBUG
      printf("merge_contexts: added \"%s\"[%d..%d] as a new left context\n",
	     wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
#endif
      ret = TRUE;
    }
#ifdef GDEBUG
    else {
      printf("merge_contexts: \"%s\"[%d..%d] already exist\n",
	     wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
    }
#endif
  }

  /* right context */
  for(s=0;s<src->rightwordnum;s++) {
    adding = src->rightword[s];
    if (adding->mark) continue;
    /* direct link between dst and src will disapper to avoid unneccesary loop */
    if (adding == dst) {
#ifdef GDEBUG
      printf("merge_contexts: skipping direct link (src) -> (dst)\n");
#endif
      continue;
    }
    for(d=0;d<dst->rightwordnum;d++) {
      if (dst->rightword[d]->mark) continue;
      if (dst->rightword[d] == adding) {
	break;
      }
    }
    if (d >= dst->rightwordnum) { /* no rightword matched */
      wordgraph_add_rightword(dst, adding);
#ifdef GDEBUG
      printf("merge_contexts: added \"%s\"[%d..%d] as a new right context\n",
	     wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
#endif
      ret = TRUE;
    }
#ifdef GDEBUG
    else {
      printf("merge_contexts: \"%s\"[%d..%d] already exist\n",
	     wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
    }
#endif
  }
  
  return(ret);
}

/** 
 * <JA>
 * ƥȾΤ륰ñ̤Υñ֤롥
 * 
 * @param wg [i/o] оݤΥñ
 * @param from [in] ֤Ȥʤ뺸ƥȾΥñ
 * @param to [in] ֤Υñ
 * </JA>
 * <EN>
 * Substitute a word at left context of a graph word to another.
 * 
 * @param wg [i/o] target graph word.
 * @param from [in] left context word to be substituted
 * @param to [in] substitution destination.
 * </EN>
 */
static void
swap_leftword(WordGraph *wg, WordGraph *from, WordGraph *to)
{
  int i;
  
#ifdef GDEBUG
  printf("swapleft: replacing left of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
	 wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
	 wchmm->winfo->woutput[from->wid], from->lefttime, from->righttime,
	 wchmm->winfo->woutput[to->wid], to->lefttime, to->righttime);
#endif
  
  for(i=0;i<wg->leftwordnum;i++) {
    if (wg->leftword[i] == from) {
      wg->leftword[i] = to;
    }
  }
}

/** 
 * <JA>
 * ƥȾΤ륰ñ̤Υñ֤롥
 * 
 * @param wg [i/o] оݤΥñ
 * @param from [in] ֤Ȥʤ뱦ƥȾΥñ
 * @param to [in] ֤Υñ
 * </JA>
 * <EN>
 * Substitute a word at right context of a graph word to another.
 * 
 * @param wg [i/o] target graph word.
 * @param from [in] right context word to be substituted
 * @param to [in] substitution destination.
 * </EN>
 */
static void
swap_rightword(WordGraph *wg, WordGraph *from, WordGraph *to)
{
  int i;
  
#ifdef GDEBUG
  printf("swapright: replacing right of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
	 wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
	 wchmm->winfo->woutput[from->wid], from->lefttime, from->righttime,
	 wchmm->winfo->woutput[to->wid], to->lefttime, to->righttime);
#endif

  for(i=0;i<wg->rightwordnum;i++) {
    if (wg->rightword[i] == from) {
      wg->rightword[i] = to;
    }
  }
}

/** 
 * <JA>
 * ƥȥꥹνʣ
 * 
 * @param wg [i/o] оݤΥñ
 * </JA>
 * <EN>
 * Delete duplicate entries in left context list of a graph word.
 * 
 * @param wg [i/o] target graph word
 * </EN>
 */
static void
uniq_leftword(WordGraph *wg)
{
  int i, j, dst;
  boolean ok;

  dst = 0;
  for(i=0;i<wg->leftwordnum;i++) {
    ok = TRUE;
    for(j=0;j<dst;j++) {
      if (wg->leftword[i] == wg->leftword[j]) {
	ok = FALSE;
	break;
      }
    }
    if (ok == TRUE) {
      wg->leftword[dst] = wg->leftword[i];
      dst++;
    }
  }
  wg->leftwordnum = dst;
}

/** 
 * <JA>
 * ƥȥꥹνʣ
 * 
 * @param wg [i/o] оݤΥñ
 * </JA>
 * <EN>
 * Delete duplicate entries in right context list of a graph word.
 * 
 * @param wg [i/o] target graph word
 * </EN>
 */
static void
uniq_rightword(WordGraph *wg)
{
  int i, j, dst;
  boolean ok;

  dst = 0;
  for(i=0;i<wg->rightwordnum;i++) {
    ok = TRUE;
    for(j=0;j<dst;j++) {
      if (wg->rightword[i] == wg->rightword[j]) {
	ok = FALSE;
	break;
      }
    }
    if (ok == TRUE) {
      wg->rightword[dst] = wg->rightword[i];
      dst++;
    }
  }
  wg->rightwordnum = dst;
}

/** 
 * <JA>
 * ΥñΥƥȥꥹȤ餽Υñ켫Ȥõ롥
 * 
 * @param wg [in] оݤΥñ
 * </JA>
 * <EN>
 * Remove the specified word graph from contexts of all left and right words.
 * 
 * @param wg [in] target graph word
 * </EN>
 */
static void
wordgraph_remove_context(WordGraph *wg)
{
  WordGraph *w;
  int i,j,k;

  if (wg == NULL) return;

  for(i=0;i<wg->leftwordnum;i++) {
    w = wg->leftword[i];
    k=0;
    for(j=0;j<w->rightwordnum;j++) {
      if (w->rightword[j] != wg) {
	if (j != k) {
	  w->rightword[k] = w->rightword[j];
	}
	k++;
      }
    }
    w->rightwordnum = k;
  }
  for(i=0;i<wg->rightwordnum;i++) {
    w = wg->rightword[i];
    k=0;
    for(j=0;j<w->leftwordnum;j++) {
      if (w->leftword[j] != wg) {
	if (j != k) {
	  w->leftword[k] = w->leftword[j];
	}
	k++;
      }
    }
    w->leftwordnum = k;
#ifdef GDEBUG2
    if (w->leftwordnum == 0) {
      j_printf("leftword becomes 0 by remove_context\n");
      put_wordgraph(w);
      j_printf("by deleting its left context:\n");
      put_wordgraph(wg);
    }
#endif
  }
}

/** 
 * <JA>
 * ñκΥƥȤ󥯤롥
 * 
 * @param wg [in] оݤΥñ
 * </JA>
 * <EN>
 * link all words at the context of the graph word.
 * 
 * @param wg [in] target graph word
 * </EN>
 */
static void
wordgraph_link_context(WordGraph *wg)
{
  int i,j;
  WordGraph *left, *right;
  
  if (wg == NULL) return;
  for(i=0;i<wg->leftwordnum;i++) {
    left = wg->leftword[i];
    if (left->mark) continue;
    if (left == wg) continue;
    for(j=0;j<wg->rightwordnum;j++) {
      right = wg->rightword[j];
      if (right->mark) continue;
      if (right == wg) continue;
      if (left == right) continue;
      wordgraph_check_and_add_leftword(right, left);
      wordgraph_check_and_add_rightword(left, right);
    }
  }
}


/**************************************************************/
/* Operations for organizing WordGraph set */

/** 
 * <JA>
 * ñ쥰κޡդñ롥
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * 
 * @return 줿ñο
 * </JA>
 * <EN>
 * Actually erase the marked words in word graph.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * 
 * @return the number of erased words.
 * </EN>
 */
static int
wordgraph_exec_erase(WordGraph **rootp)
{
  WordGraph *wg, *we, *wtmp;
  int count;

  if (*rootp == NULL) return(0);
  
  wg = *rootp;
  count = 0;
  while (wg != NULL) {
    we = wg->next;
    while(we != NULL && we->mark == TRUE) {
      wtmp = we->next;
      wordgraph_free(we); count++;
      we = wtmp;
    }
    wg->next = we;
    wg = we;
  }
  if ((*rootp)->mark == TRUE) {
    wtmp = (*rootp)->next;
    wordgraph_free(*rootp); count++;
    *rootp = wtmp;
  }

  return(count);
}

/** 
 * <JA>
 * ե qsort Хå
 * 
 * @param x [in] ǣ
 * @param y [in] ǣ
 * 
 * @return x > y ʤ 1, x < y ʤ -1, x = y ʤ 0 ֤
 * </JA>
 * <EN>
 * qsort callback for word sorting.
 * 
 * @param x [in] element 1
 * @param y [in] element 2
 * 
 * @return 1 if x>y, -1 if x<y, 0 if x = y.
 * </EN>
 */
static int
compare_lefttime(WordGraph **x, WordGraph **y)
{
  if ((*x)->lefttime > (*y)->lefttime) return 1;
  else if ((*x)->lefttime < (*y)->lefttime) return -1;
  else {
    if ((*x)->righttime > (*y)->righttime) return 1;
    else if ((*x)->righttime < (*y)->righttime) return -1;
    else {
      if ((*x)->fscore_head < (*y)->fscore_head) return 1;
      else if ((*x)->fscore_head > (*y)->fscore_head) return -1;
      else return 0;
    }
  }
}

/** 
 * <JA>
 * ñ쥰ñ򳫻ϻֽ˥Ȥֹ̤Ĥ롥
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ󥿳Ǽ
 * </JA>
 * <EN>
 * Sort words by left time and annotate sequencial id for them in a word graph.
 * 
 * @param rootp [i/o] address of pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_sort_and_annotate_id(WordGraph **rootp)
{
  WordGraph *wg;
  int cnt;
  WordGraph **wlist;
  int i;
  WordGraph *wo;

  /* count total number of words in the graph */
  cnt = 0;
  for(wg=*rootp;wg;wg=wg->next) cnt++;
  graph_totalwordnum = cnt;
  if (graph_totalwordnum == 0) return;
  /* sort them by lefttime */
  wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * graph_totalwordnum);
  i = 0;
  for(wg=*rootp;wg;wg=wg->next) {
    wlist[i++] = wg;
  }
  qsort(wlist, graph_totalwordnum, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_lefttime);

  /* annotated id and re-order the link by the id */
  wo = NULL;
  for(i=graph_totalwordnum-1;i>=0;i--) {
    wg = wlist[i];
    wg->id = i;
    wg->next = wo;
    wo = wg;
  }
  *rootp = wo;
  free(wlist);
}

/** 
 * <JA>
 * ñ쥰ñƲ롥
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * Free all the words in a word graph.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_clean(WordGraph **rootp)
{
  WordGraph *wg, *wtmp;

  wg = *rootp;
  while(wg != NULL) {
    wtmp = wg->next;
    wordgraph_free(wg);
    wg = wtmp;
  }
  *rootp = NULL;

}


/*********************************************************************/
/* Post-processing of generated word arcs after search has been done */

/** 
 * <JA>
 * ñ쥰տåȤΤ qsort ѥХåfscore_head 
 * ߽˥Ȥ롥
 * 
 * @param x [in] ǣ
 * @param y [in] ǣ
 * 
 * @return qsort ˽स֤
 * </JA>
 * <EN>
 * Callback function for qsort to do word graph depth cutting. Graph
 * words will be sorted downward based on fscore_head.
 * 
 * @param x [in] element 1
 * @param y [in] element 2
 * 
 * @return values for qsort
 * </EN>
 */
static int
compare_beam(WordGraph **x, WordGraph **y)
{
  if ((*x)->fscore_head < (*y)->fscore_head) return 1;
  else if ((*x)->fscore_head > (*y)->fscore_head) return -1;
  else return 0;
}

/** 
 * <JA>
 * @brief  ոΣñ쥰դС
 * 
 * õ줿ñ佸礫顤üϤޤѥ̵leafñ
 * 뤳Ȥǽñ쥰դФ롥
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * @brief  Post-processing step 1: Extract initial word graph.
 * 
 * Extract initial word graph from generated word arcs while search, by
 * purging leaf nodes and arcs that are not on the path from edge to edge.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_purge_leaf_nodes(WordGraph **rootp)
{
  WordGraph *wg;
  int i, dst;
  boolean changed;
  int count, erased, del_left, del_right;

  /* count whole */
  count = 0;
  for(wg=*rootp;wg;wg=wg->next) count++;
  j_printf("- %d initial word arcs generated\n", count);
  if (count == 0) return;
  
  j_printf("Step 1: purge leaf nodes\n");

  /* mark words to be erased */
  del_left = del_right = 0;
  do {
    changed = FALSE;
    for(wg=*rootp;wg;wg=wg->next) {
      if (wg->mark == TRUE) continue;
      /* mark if wg has no left context, or all leftwords are marked */
      if (wg->lefttime != 0) {
	for(i=0;i<wg->leftwordnum;i++) {
	  if (wg->leftword[i]->mark == FALSE) break;
	}
	if (i >= wg->leftwordnum) {
	  wg->mark = TRUE;
	  changed = TRUE;
	  del_left++;
	  continue;
	}
      }
      /* mark if wg has no right context, or all rightwords are marked */
      if (wg->righttime != peseqlen - 1) {
	for(i=0;i<wg->rightwordnum;i++) {
	  if (wg->rightword[i]->mark == FALSE) break;
	}
	if (i >= wg->rightwordnum) {
	  wg->mark = TRUE;
	  changed = TRUE;
	  del_right++;
	  continue;
	}
      }
    }
  } while (changed == TRUE);

  j_printf("- %d leaf words found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right);

  /* do compaction of left/rightwords */
    for(wg=*rootp;wg;wg=wg->next) {
      if (wg->mark) continue;
      dst = 0;
      for(i=0;i<wg->leftwordnum;i++) {
	if (wg->leftword[i]->mark == FALSE) {
	  if (dst != i) wg->leftword[dst] = wg->leftword[i];
	  dst++;
	}
      }
      wg->leftwordnum = dst;
    }
    for(wg=*rootp;wg;wg=wg->next) {
      if (wg->mark) continue;
      dst = 0;
      for(i=0;i<wg->rightwordnum;i++) {
	if (wg->rightword[i]->mark == FALSE) {
	  if (dst != i) wg->rightword[dst] = wg->rightword[i];
	  dst++;
	}
      }
      wg->rightwordnum = dst;
    }

  /* execute erase of marked words */
  erased = wordgraph_exec_erase(rootp);
  j_printf("- %d words purged, %d words left in lattice\n", erased, count - erased);

}

/** 
 * <JA>
 * @brief  ոΣդοˤñΥå
 * 
 * GRAPHOUT_DEPTHCUT դοˤñΥåȤԤ
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * @brief  Post-processing step 1.5: word graph depth cutting
 * 
 * If GRAPHOUT_DEPTHCUT is defined, perform word graph depth cutting.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_depth_cut(WordGraph **rootp)
{
#ifdef GRAPHOUT_DEPTHCUT

  WordGraph *wg;
  int i, dst;
  boolean changed;
  int count, erased, del_left, del_right;
  WordGraph **wlist;
  boolean f;
  int *wc;
  int t;
  int pruned;


  if (graphout_cut_depth < 0) return;

  j_printf("Step 1.5: cut less likely hypothesis by depth of %d\n", graphout_cut_depth);

  /* count whole */
  count = 0;
  for(wg=*rootp;wg;wg=wg->next) count++;
  if (count == 0) return;
  
  /* prepare buffer to count words per frame */
  wc = (int *)mymalloc(sizeof(int) * peseqlen);
  for (t=0;t<peseqlen;t++) wc[t] = 0;
  /* sort words by fscore_head */
  wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * count);
  i = 0;
  for(wg=*rootp;wg;wg=wg->next) {
    wlist[i++] = wg;
  }
  qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_beam);
  /* count words per frame, and unlink/mark them if below beam width */
  pruned = 0;
  for (i=0;i<count;i++) {
    wg = wlist[i];
    f = TRUE;
    for (t=wg->lefttime;t<=wg->righttime;t++) {
      wc[t]++;
      if (wc[t] <= graphout_cut_depth) f = FALSE;
    }
    if (f) {
      //wordgraph_remove_context(wg);
      wg->mark = TRUE;
      pruned++;
    }
  }
#ifdef GDEBUG2
  printf("GRAPH DEPTH STATISTICS: NUMBER OF WORDS PER FRAME\n");
  for(t=0;t<peseqlen;t++) {
    if (wc[t] > graphout_cut_depth) {
      printf("*");
    } else {
      printf(" ");
    }
    printf("%4d: %d\n", t, wc[t]);
  }
#endif
  j_printf("- %d words out of %d are going to be pruned by depth cutting\n", pruned, count);
  free(wlist);
  free(wc);

  /* mark words to be erased */
  del_left = del_right = 0;
  do {
    changed = FALSE;
    for(wg=*rootp;wg;wg=wg->next) {
      if (wg->mark == TRUE) continue;
      /* mark if wg has no left context, or all leftwords are marked */
      if (wg->lefttime != 0) {
	for(i=0;i<wg->leftwordnum;i++) {
	  if (wg->leftword[i]->mark == FALSE) break;
	}
	if (i >= wg->leftwordnum) {
	  wg->mark = TRUE;
	  changed = TRUE;
	  del_left++;
	  continue;
	}
      }
      /* mark if wg has no right context, or all rightwords are marked */
      if (wg->righttime != peseqlen - 1) {
	for(i=0;i<wg->rightwordnum;i++) {
	  if (wg->rightword[i]->mark == FALSE) break;
	}
	if (i >= wg->rightwordnum) {
	  wg->mark = TRUE;
	  changed = TRUE;
	  del_right++;
	  continue;
	}
      }
    }
  } while (changed == TRUE);

  j_printf("- %d new leaves found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right);

  /* do compaction of left/rightwords */
    for(wg=*rootp;wg;wg=wg->next) {
      if (wg->mark) continue;
      dst = 0;
      for(i=0;i<wg->leftwordnum;i++) {
	if (wg->leftword[i]->mark == FALSE) {
	  if (dst != i) wg->leftword[dst] = wg->leftword[i];
	  dst++;
	}
      }
      wg->leftwordnum = dst;
    }
    for(wg=*rootp;wg;wg=wg->next) {
      if (wg->mark) continue;
      dst = 0;
      for(i=0;i<wg->rightwordnum;i++) {
	if (wg->rightword[i]->mark == FALSE) {
	  if (dst != i) wg->rightword[dst] = wg->rightword[i];
	  dst++;
	}
      }
      wg->rightwordnum = dst;
    }

  /* execute erase of marked words */
  erased = wordgraph_exec_erase(rootp);
  j_printf("- total %d words purged, %d words left in lattice\n", erased, count - erased);

#else  /* ~GRAPHOUT_DEPTHCUT */

  j_printf("Warning: Step 1.5: graph depth cutting has been disabled, skipped\n");

#endif

}

/** 
 * <JA>
 * ñ֤ζΤ¹Ԥ롥ñå,
 * ³ñ֤Ƕ־ˤ줬ȤϡΤ롥
 * ʣΥƥȴ֤ǰۤʤ붭¸ߤ,
 * ԡƤ줾˹碌롥ޤ饤Ȥñ롥
 * 
 * @param rootp [i/o] ñꥹȤΥ롼ȥݥ
 * @param mov_num_ret [out] ֤ưñǼѿؤΥݥ
 * @param dup_num_ret [out] ԡ줿ñǼѿؤΥݥ
 * @param del_num_ret [out] 줿ñǼѿؤΥݥ
 * @param count [in] վñ
 * 
 * @return ñ줬İʾѹ TRUEѹʤǤ FALSE
 * ֤
 * </JA>
 * <EN>
 * Execute adjustment of word boundaries.  It looks through the graph to
 * check correspondence of word boundary information among context, and if
 * there is a gap, the beginning frame of right word will be moved to the
 * end frame of left word.  If several alignment is found among contexts,
 * the word will be duplicated and each will be fit to each context.  Also,
 * words with invalid alignment will be eliminated.
 * 
 * @param rootp [in] root pointer to the list of graph words
 * @param mov_num_ret [out] pointer to hold resulted number of moved words
 * @param dup_num_ret [out] pointer to hold resulted number of duplicated words
 * @param del_num_ret [out] pointer to hold resulted number of eliminated words
 * @param mod_num_ret [out] pointer to hold resulted number of modified words
 * @param count [in] number of words in graph
 * 
 * @return TRUE if any word has been changed, or FALSE if no word has been altered.
 * </EN>
 */
static boolean
wordgraph_adjust_boundary_sub(WordGraph **rootp, int *mov_num_ret, int *dup_num_ret, int *del_num_ret, int *mod_num_ret, int count, int *maxfnum)
{
  WordGraph *wg, *left, *new;
  int i, j, k;
  int fnum;
  int mov_num, dup_num, del_num, mod_num;
  boolean changed = FALSE;

  mov_num = dup_num = del_num = mod_num = 0;

  /* maximum number of left context words does not exceed total word num */
  /* allocate temporal work area.  these are permanent buffer that will
     be kept between recognition sessions. */
  if (*maxfnum == 0) {
    /* when this is called for the first time, allocate buffer */
    *maxfnum = count;
    framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
    framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
#ifdef GDEBUG
    j_printerr("Notice: maxfnum starts at %d\n", *maxfnum);
#endif
  } else if (*maxfnum < count) {
    /* for later call, expand buffer if necessary */
    free(framescorelist);
    free(framelist);
    *maxfnum = count;
    framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
    framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
#ifdef GDEBUG
    j_printerr("Notice: maxfnum expanded by count (%d)\n", *maxfnum);
#endif
  }

#ifdef GDEBUG2
  printf("***CHECK LOOP BEGIN***\n");
#endif
  for(wg=*rootp;wg;wg=wg->next) {
    if (wg->mark) continue;	/* already marked */
#ifdef GDEBUG2
    printf("  [%d..%d] \"%s\"\n", wg->lefttime, wg->righttime, wchmm->winfo->woutput[wg->wid]);
#endif
    if (wg->leftwordnum == 0) {	/* no leftword */
      if (wg->lefttime != 0) {
	/* some fraction found by former elimination: remove this */
#ifdef GDEBUG2
	printf("  -> no leftword at middle of lattice, eliminate this\n");
#endif
	wordgraph_remove_context(wg);
	wg->mark = TRUE;
	del_num++;
	changed = TRUE;
      }
      /* if has no leftword, adjustment of this word is not needed */
      continue;
    }
    if (wg->rightwordnum == 0) {	/* no rightword */
      if (wg->righttime != peseqlen-1) {
	/* some fraction found by former elimination: remove this */
#ifdef GDEBUG2
	printf("  -> no rightword at middle of lattice, eliminate this\n");
#endif
	wordgraph_remove_context(wg);
	wg->mark = TRUE;
	del_num++;
	changed = TRUE;
	continue;
      }
      /* if on right edge, continue adjusting */
    }
    /* correct lefttime variation to framelist[] and framescorelist[] */
    fnum = 0;
    /* check for buffer overrun */
    if (wg->leftwordnum > (*maxfnum)) {
      /* expand buffer if necessary */
      free(framescorelist);
      free(framelist);
      *maxfnum = wg->leftwordnum;
      framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
      framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
#ifdef GDEBUG
      j_printerr("Notice: wg->leftwordnum exceeds maxfnum (%d > %d), expanded\n", wg->leftwordnum, *maxfnum);
#endif
    }
    for(i=0;i<wg->leftwordnum;i++) {
      left = wg->leftword[i];
      if (left->mark) continue;
      for(j=0;j<fnum;j++) {
	if (framelist[j] == left->righttime + 1) break;
      }
      if (j >= fnum) {
	framelist[fnum] = left->righttime + 1;
	framescorelist[fnum] = left->gscore_tail
#ifdef USE_NGRAM
	  /* the tail gscore contains the language score of the word,
	     so the head gscore of its right context should consider this */
	  - left->lscore
#endif
	  ;
	fnum++;
      }
    }
#ifdef GDEBUG2
    printf("  possible boundary of left words:");
    if (fnum == 0) {
      printf(" (not exist)\n");
    } else {
      for(j=0;j<fnum;j++) printf(" %d", framelist[j]);
      printf("\n");
    }
#endif
    if (fnum == 0) continue;	/* no left context */
    /* one candidate: just move the original (or not move) */
    if (fnum == 1) {
      if (wg->lefttime != framelist[0]) {
#ifdef GDEBUG2
	printf("    !moving as [%d..%d]", framelist[0], wg->righttime);
#endif
	/* check the time correctness: if the lefttime is larger than
	   righttime, this graph word has been completely overridden by
	   the left word (i.e. the aligned frames are absorbed by
	   re-alignment.  In this case this word should be removed.
	*/
	if (framelist[0] > wg->righttime) {
#ifdef GDEBUG2
	  printf(" : eliminated");
#endif
	  wordgraph_link_context(wg);
	  wordgraph_remove_context(wg);
	  wg->mark = TRUE;
	  del_num++;
	} else {
#ifdef GDEBUG2
	  printf(" : ok");
#endif
	  /* adjust time and score */
	  wg->lefttime = framelist[0];
	  wg->gscore_head = framescorelist[0];
	  mov_num++;
	}
#ifdef GDEBUG2
	printf("\n");
#endif
	changed = TRUE;
      } else if (wg->gscore_head != framescorelist[0]) {
	/* adjust only score */
#ifdef GDEBUG2
	printf("    !ghead score changed: %f -> %f\n", wg->gscore_head, framescorelist[0]);
#endif
	wg->gscore_head = framescorelist[0];
	mod_num++;
	changed = TRUE;
      }
    }
    if (fnum > 1) {
      /* multiple candidate: make copy for each (fnum)*/
      for(j=0;j<fnum;j++) {
	/* duplicate */
	dup_num++;
#ifdef GDEBUG2
	printf("    !duping as [%d..%d]", framelist[j], wg->righttime);
#endif
	
	if (framelist[j] > wg->righttime) {
	  /* bogus link: link leftwords and rightwords, and delete this */
#ifdef GDEBUG2
	  printf(" : eliminated");
#endif
	  for(i=0;i<wg->leftwordnum;i++) {
	    left = wg->leftword[i];
	    if (left->mark) continue;
	    if (left->righttime + 1 == framelist[j]) {
	      for(k=0;k<wg->rightwordnum;k++) {
		if ((wg->rightword[k])->mark) continue;
		if (wg->rightword[k] == left) continue;
		wordgraph_check_and_add_leftword(wg->rightword[k], left);
		wordgraph_check_and_add_rightword(left, wg->rightword[k]);
	      }
	    }
	  }
	  del_num++;
	  
	} else {
	  /* really duplicate */
#ifdef GDEBUG2
	  printf(" : ok");
#endif
	  new = wordgraph_new(wg->wid, wg->headphone, wg->tailphone, framelist[j], wg->righttime, wg->fscore_head, wg->fscore_tail, framescorelist[j], wg->gscore_tail
#ifdef USE_NGRAM
			      , wg->lscore
#else
			      , LOG_ZERO
#endif
#ifdef CM_SEARCH
			      , wg->cmscore
#else
			      , LOG_ZERO
#endif
			      );
	  /* copy corresponding link */
	  for(i=0;i<wg->leftwordnum;i++) {
	    if ((wg->leftword[i])->mark) continue;
	    if ((wg->leftword[i])->righttime + 1 == framelist[j]) {
	      wordgraph_add_leftword(new, wg->leftword[i]);
	      wordgraph_add_rightword(wg->leftword[i], new);
	    }
	  }
	  for(i=0;i<wg->rightwordnum;i++) {
	    if ((wg->rightword[i])->mark) continue;
	    wordgraph_add_rightword(new, wg->rightword[i]);
	    wordgraph_add_leftword(wg->rightword[i], new);
	  }
	  new->saved = TRUE;
	  new->next = *rootp;
	  *rootp = new;
	}
#ifdef GDEBUG2
	printf("\n");
#endif
      }
      
      /* remove the original */
#ifdef GDEBUG2
      printf("    !delete original [%d..%d]\n", wg->lefttime, wg->righttime);
#endif
      wordgraph_remove_context(wg);
      wg->mark = TRUE;
      dup_num--;
      
      changed = TRUE;
    }
  }

  *mov_num_ret = mov_num;
  *dup_num_ret = dup_num;
  *del_num_ret = del_num;
  *mod_num_ret = mod_num;

#ifdef GDEBUG2
  if (changed) {
    printf("*** some graph has been altered, check loop continues\n");
  } else {
    printf("*** graph not changed at last loop, check ends here\n");
  }
#endif

  return (changed);
}

/** 
 * <JA>
 * ˶䥹Ʊñ줬礽ޡ롥
 * 
 * @param rootp [i/o] ñꥹȤΥ롼ȥݥ
 * @param rest_ret [out] ޡΥñ֤ݥ
 * @param merged_ret [out] ޡ줿ñ֤ݥ
 * </JA>
 * <EN>
 * Merge duplicated words with exactly the same scores and alignments.
 * 
 * @param rootp [i/o] root pointer to the list of graph words
 * @param rest_ret [out] pointer to hold resulted number of words left in graph
 * @param merged_ret [out] pointer to hold resuled number of merged words
 * </EN>
 */
static void
wordgraph_compaction_thesame_sub(WordGraph **rootp, int *rest_ret, int *merged_ret)
{
  WordGraph *wg, *we;
  int i, count, erased, merged;

  count = 0;
  merged = 0;
  for(wg=*rootp;wg;wg=wg->next) {
    count++;
    if (wg->mark == TRUE) continue;
    for(we=wg->next;we;we=we->next) {
      if (we->mark == TRUE) continue;
      /* find the word with exactly the same time and score */
      if (wg->wid == we->wid &&
	  wg->headphone == we->headphone &&
	  wg->tailphone == we->tailphone &&
	  wg->lefttime == we->lefttime &&
	  wg->righttime == we->righttime &&
	  wg->fscore_head == we->fscore_head &&
	  wg->fscore_tail == we->fscore_tail) {
	/* merge contexts */
	merge_contexts(wg, we);
	/* swap contexts of left / right contexts */
	for(i=0;i<we->leftwordnum;i++) {
	  if (we->leftword[i]->mark) continue;
	  //if (we->leftword[i] == wg) continue;
	  swap_rightword(we->leftword[i], we, wg);
	}
	for(i=0;i<we->rightwordnum;i++) {
	  if (we->rightword[i]->mark) continue;
	  //if (we->rightword[i] == wg) continue;
	  swap_leftword(we->rightword[i], we, wg);
	}
	we->mark = TRUE;
	merged++;
      }
    }
  }

  erased = wordgraph_exec_erase(rootp);

  for(wg=*rootp;wg;wg=wg->next) {
    uniq_leftword(wg);
    uniq_rightword(wg);
  }

  *rest_ret = count - erased;
  *merged_ret = merged;
}

/** 
 * <JA>
 * @brief  ոΣñ춭Ĵ
 * 
 * GRAPHOUT_PRECISE_BOUNDARY ³ñ˰¸Τñ춭
 * 뤿ˡõˤơñȤ˼Ÿ
 * Ūñ춭ư롥ΤᡤñΤġʰưΡ
 * ȤбȤʤʤΤǡõλ˳ññ
 * ñ춭¤뤳ȤȤ롥
 *
 * ñ춭Τñ֤¤뤿ᡤ٤Ƥñ춭ưʤʤޤ
 * Ĵ֤롥ʥդǤûñʨǽʤ
 * 礬뤬ξ GRAPHOUT_LIMIT_BOUNDARY_LOOP ꤹ뤳Ȥǡ
 * ֤ξ¤ graphout_limit_boundary_loop_num ¤Ǥ롥
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * @brief  Post-processing step 2: Adjust word boundaries.
 * 
 * When GRAPHOUT_PRECISE_BOUNDARY is defined, the word boundaries will be
 * moved depending on the later word expansion to get context-dependent
 * precise boundaries.  So the precise boundary, modified after generation
 * while search, should be propagated to the context words in the post
 * processing.
 *
 * Since the affect of word boundaries may propagate to the context words,
 * the adjustment procedure has to be executed iteratively until all the
 * boundaries are fixated.  However, when graph is large, the oscillation of
 * short words will results in very long loop.  By defining
 * GRAPHOUT_LIMIT_BOUNDARY_LOOP, the number of the adjustment loop can be
 * up to the number specified by graphout_limit_bounrady_loop_num.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_adjust_boundary(WordGraph **rootp)
{
#ifdef GRAPHOUT_PRECISE_BOUNDARY
  WordGraph *wg;
  int mov_num, dup_num, del_num, mod_num;
  int count, merged;
  boolean flag;
  int loopcount;
  int maxfnum;

  loopcount = 0;

  j_printf("Step 2: adjust boundaries\n");
  mov_num = dup_num = del_num = 0;

  /* count number of all words */
  count = 0;
  for(wg=*rootp;wg;wg=wg->next) count++;
  maxfnum = 0;

  do {
    /* do adjust */
    flag = wordgraph_adjust_boundary_sub(rootp, &mov_num, &dup_num, &del_num, &mod_num, count, &maxfnum);
    /* do compaction */
    wordgraph_compaction_thesame_sub(rootp, &count, &merged);
    /*j_printf("- %d moved, %d duplicated, %d purged (bad align), %d words left\n", mov_num, dup_num, del_num, count + dup_num - del_num);*/
    j_printf("- #%d: %d moved, %d duplicated, %d purged, %d modified, %d idential, %d left\n", loopcount + 1, mov_num, dup_num, del_num, mod_num, merged, count);
#ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP
    if (++loopcount >= graphout_limit_boundary_loop_num) {
      j_printf("*** loop count reached %d, terminate loop now\n", graphout_limit_boundary_loop_num);
      break;
    }
#endif
  } while (flag);

  /* free work area allocated in adjust_boundary_sub */
  if (maxfnum > 0) {
    free(framescorelist);
    free(framelist);
  }

  /* execute erase of marked words */
  wordgraph_exec_erase(rootp);

#else

  j_printf("# Step 2: SKIP (adjusting boundaries)\n");

#endif /* GRAPHOUT_PRECISE_BOUNDARY */

}

 
/** 
 * <JA>
 * @brief  ոΣñ«͡ʴƱ
 * 
 * ñ춭ʬʸ⥹˰פƱñɤĤ«ͤ롥
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * @brief  Post-processing step 3: Bundle words (exactly the same ones)
 * 
 * This function bundles same words which have exactly the same
 * boundaries and partial sentence scores.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_compaction_thesame(WordGraph **rootp)
{
  int rest, erased;

  j_printf("Step 3: merge idential hypotheses (same score, boundary, context)\n");
  wordgraph_compaction_thesame_sub(rootp, &rest, &erased);
  j_printf("- %d words merged, %d words left in lattice\n", erased, rest);
}

/** 
 * <JA>
 * @brief  ոΣñ«͡ʶƱ
 * 
 * ñ춭郎פƱñɤĤ«ͤ롥
 * ƱǤʤƤ«ͤ롥ξ硤ʬʸǤ⤤䤬
 * Ĥ롥graph_merge_neighbor_range   ξϼ¹Ԥʤ

 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * @brief  Post-processing step 4: Bundle words (same boundaries)
 * 
 * This function bundles the same words which have exactly the same
 * boundaries, allowing having different scores.  The word with
 * the best partial sentence score will be adopted.  This function
 * will not take effect when graph_merge_neightbor_range is lower than 0.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_compaction_exacttime(WordGraph **rootp)
{
  WordGraph *wg, *we;
  int i, count, erased;
  WordGraph *wtmp;

  if (graph_merge_neighbor_range < 0) {
    j_printf("# Step 4: SKIP (merge the same words with same boundary to the most likely one\n");
    return;
  }

  j_printf("Step 4: merge same words with same boundary to the most likely one\n");

  count = 0;
  for(wg=*rootp;wg;wg=wg->next) {
    count++;
    if (wg->mark == TRUE) continue;
    for(we=wg->next;we;we=we->next) {
      if (we->mark == TRUE) continue;
      /* find same words at same position */
      if (wg->wid == we->wid &&
	  wg->lefttime == we->lefttime &&
	  wg->righttime == we->righttime) {
	/* merge contexts */
	merge_contexts(wg, we);
	/* swap contexts of left / right contexts */
	for(i=0;i<we->leftwordnum;i++) {
	  swap_rightword(we->leftword[i], we, wg);
	}
	for(i=0;i<we->rightwordnum;i++) {
	  swap_leftword(we->rightword[i], we, wg);
	}
	/* keep the max score */
	if (wg->fscore_head < we->fscore_head) {
	  wg->headphone = we->headphone;
	  wg->tailphone = we->tailphone;
	  wg->fscore_head = we->fscore_head;
	  wg->fscore_tail = we->fscore_tail;
	  wg->gscore_head = we->gscore_head;
	  wg->gscore_tail = we->gscore_tail;
#ifdef USE_NGRAM
	  wg->lscore = we->lscore;
#endif
#ifdef CM_SEARCH
	  wg->cmscore = we->cmscore;
#endif
	}
	we->mark = TRUE;
      }
    }
  }
  erased = wordgraph_exec_erase(rootp);
  j_printf("- %d words merged, %d words left in lattice\n", erased, count-erased);

  for(wg=*rootp;wg;wg=wg->next) {
    uniq_leftword(wg);
    uniq_rightword(wg);
  }
}

/** 
 * <JA>
 * @brief  ոΣñ«͡ʶ˵֡
 * 
 * ñ춭ƱñɤĤ«ͤ롥
 * graph_merge_neighbor_range Ϳ줬 0 Ǥϼ¹Ԥʤ
 * 
 * @param rootp [i/o] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * @brief  Post-processing step 5: Bundle words (neighbor words)
 * 
 * This function bundles the same words which appears at similar place.
 * If the difference of both the left boundary and right right boundary
 * is under graph_merge_neighbor_range, it will be bundled.
 * If its value is lower than or equal to 0, this function does not take
 * effect.
 * 
 * @param rootp [i/o] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_compaction_neighbor(WordGraph **rootp)
{
  WordGraph *wg, *we;
  int i, count, erased;

  if (graph_merge_neighbor_range <= 0) {
    j_printf("# Step 5: SKIP (merge the same words around)\n");
    return;
  }

  j_printf("Step 5: merge same words around, with %d frame margin\n", graph_merge_neighbor_range);

  count = 0;
  for(wg=*rootp;wg;wg=wg->next) {
    count++;
    if (wg->mark == TRUE) continue;
    for(we=wg->next;we;we=we->next) {
      if (we->mark == TRUE) continue;
      if (wg->wid == we->wid &&
	  abs(wg->lefttime - we->lefttime) <= graph_merge_neighbor_range &&
	  abs(wg->righttime - we->righttime) <= graph_merge_neighbor_range) {
	/* merge contexts */
	merge_contexts(wg, we);
	/* swap contexts of left / right contexts */
	for(i=0;i<we->leftwordnum;i++) {
	  swap_rightword(we->leftword[i], we, wg);
	}
	for(i=0;i<we->rightwordnum;i++) {
	  swap_leftword(we->rightword[i], we, wg);
	}
	/* keep the max score */
	if (wg->fscore_head < we->fscore_head) {
	  wg->headphone = we->headphone;
	  wg->tailphone = we->tailphone;
	  wg->fscore_head = we->fscore_head;
	  wg->fscore_tail = we->fscore_tail;
	  wg->gscore_head = we->gscore_head;
	  wg->gscore_tail = we->gscore_tail;
#ifdef USE_NGRAM
	  wg->lscore = we->lscore;
#endif
#ifdef CM_SEARCH
	  wg->cmscore = we->cmscore;
#endif
	}
	we->mark = TRUE;
      }
    }
  }
  erased = wordgraph_exec_erase(rootp);
  j_printf("- %d words merged, %d words left in lattice\n", erased, count-erased);

  for(wg=*rootp;wg;wg=wg->next) {
    uniq_leftword(wg);
    uniq_rightword(wg);
  }
 
}


/**************************************************************/
/* generation of graph word candidates while search */

/** 
 * <JA>
 * ñ쥰ո֤λǤϤޤñ쥰ˤ
 * ϿƤʤ
 * 
 * @param wid [in] ñID
 * @param leftframe [in] ü(ե졼)
 * @param rightframe [in] ü(ե졼)
 * @param fscore_head [in] üǤʬʸ (g + h)
 * @param fscore_tail [in] üǤʬʸ (g + h)
 * @param gscore_head [in] ƬǤüViterbi (g)
 * @param gscore_tail [in] ǤüViterbi (g)
 * 
 * @return 줿ñؤΥݥ
 * </JA>
 * <EN>
 * Return a newly allocated graph word candidates.  The resulting word
 * is not registered to the word graph yet.
 * 
 * @param wid [in] word ID
 * @param leftframe [in] beginning time in frames
 * @param rightframe [in] end time in frames
 * @param fscore_head [in] sentence score on search at word head (g + h)
 * @param fscore_tail [in] sentence score on search at word tail (g + h)
 * @param gscore_head [in] Viterbi score accumulated from input end at word head (g)
 * @param gscore_tail [in] Viterbi score accumulated from input end at word tail (g)
 * 
 * @return pointer to the newly created graph word candidate.
 * </EN>
 */
WordGraph *
wordgraph_assign(WORD_ID wid, WORD_ID wid_left, WORD_ID wid_right, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm)
{
  WordGraph *newarc;
  HMM_Logical *l, *ret, *head, *tail;

  /* find context dependent phones at head and tail */
  l = wchmm->winfo->wseq[wid][winfo->wlen[wid]-1];
  if (wid_right != WORD_INVALID) {
    ret = get_right_context_HMM(l, wchmm->winfo->wseq[wid_right][0]->name, wchmm->hmminfo);
    if (ret != NULL) l = ret;
  }
  if (wchmm->winfo->wlen[wid] > 1) {
    tail = l;
    l = wchmm->winfo->wseq[wid][0];
  }
  if (wid_left != WORD_INVALID) {
    ret = get_left_context_HMM(l, wchmm->winfo->wseq[wid_left][winfo->wlen[wid_left]-1]->name, wchmm->hmminfo);
    if (ret != NULL) l = ret;
  }
  head = l;
  if (wchmm->winfo->wlen[wid] <= 1) {
    tail = l;
  }

  /* generate a new graph word hypothesis */
  newarc = wordgraph_new(wid, head, tail, leftframe, rightframe, fscore_head, fscore_tail, gscore_head, gscore_tail, lscore, cm);
  //printf("    [%d..%d] %d\n", leftframe, rightframe, wid);
  return newarc;
}

/** 
 * <JA>
 * ññ쥰դΰȤƳꤹ롥ꤵ줿ñˤ
 * saved  TRUE åȤ롥
 * 
 * @param wg [i/o] Ͽ륰ñ
 * @param right [i/o] @a wg αƥȤȤʤñ
 * @param root [i/o] Ѥñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * Register a graph word candidate to the word graph as a member.
 * The registered word will have the saved member to be set to TRUE.
 * 
 * @param wg [i/o] graph word candidate to be registered
 * @param right [i/o] right context graph word
 * @param root [i/o] pointer to root node of already registered word graph
 * </EN>
 */
void
wordgraph_save(WordGraph *wg, WordGraph *right, WordGraph **root)
{
  if (wg != NULL) {
    wg->next = *root;
    *root = wg;
    wg->saved = TRUE;
    wordgraph_add_leftword(right, wg);
    wordgraph_add_rightword(wg, right);
  }
}

#ifdef GRAPHOUT_DYNAMIC

/** 
 * <JA>
 * ñ쥰ոˤĤơ˳ꤷñƱ֤
 * Ʊñ줬뤫ɤĴ٤롥⤷Сñ쥰ո
 * ƥȤ򤽤γѤߥñ˥ޡ롥
 *
 * GRAPHOUT_SEARCHϡˤõߤ٤ɤȽꤹ롥
 * ʤñ첾⤬ΥñκƥȤȤƴ˳ꤷ
 * ñˤСʾŸפõߤ٤Ƚꤹ롥
 * 
 * @param now [i/o] ñ쥰ո
 * @param root [i/o] Ѥñ쥰դΥ롼ȥΡɤؤΥݥ
 * @param next_wid [in] ñ첾
 * @param merged_p [out] õߤ٤ʤ TRUE³ԤƤ褱
 * FALSE Ǽ (GRAPHOUT_SEARCH )
 * 
 * @return Ʊ֤Ʊñ줬ä硤ޡ
 * ѤߥñؤΥݥ󥿤֤⤷ʤä硤NULL ֤
 * </JA>
 * <EN>
 * Check if a graph word with the same word ID and same position as the
 * given graph word candidate exists in the already registered word graph.
 * If such graph word is found, the word contexts of the given word
 * graph candidate will be merged to the found graph word in the registered
 * word graph.
 *
 * When GRAPHOUT_SEARCH is defined, whether to terminate the search at here
 * will be determined here.  That is, if the next word in search already
 * exists in the list of left context words of the merged graph word,
 * it is determined that the next path has already been expanded and thus
 * there is no need to proceed more on this hypothesis.
 * 
 * @param now [i/o] graph word candidate
 * @param root [i/o] pointer to root node of already registered word graph
 * @param next_wid [in] next word on search
 * @param merged_p [out] will be set to TRUE if search should be terminated,
 * or FALSE if search should be proceeded (when GRAPHOUT_SEARCH defined)
 * 
 * @return the pointer to the already registered graph word when the same
 * word was found on the same position, or NULL if such word not found in
 * already registered word graph.
 * </EN>
 */
WordGraph *
wordgraph_check_merge(WordGraph *now, WordGraph **root, WORD_ID next_wid, boolean *merged_p)
{
  WordGraph *wg;
  int i;
#ifdef GDEBUG
  WordGraph *w;
#endif

#ifdef GRAPHOUT_SEARCH
  *merged_p = FALSE;
#endif

  if (now == NULL) return(NULL);

#ifdef GDEBUG
  printf("check_merge: checking \"%s\"[%d..%d]\n", wchmm->winfo->woutput[now->wid], now->lefttime, now->righttime);
  for(i=0;i<now->leftwordnum;i++) {
    w = now->leftword[i];
    printf("\t left%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
  }
  for(i=0;i<now->rightwordnum;i++) {
    w = now->rightword[i];
    printf("\tright%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
  }
#endif

  for(wg=*root;wg;wg=wg->next) {
    if (wg == now) continue;
#ifdef GRAPHOUT_DYNAMIC
    /* skip already merged word */
    if (wg->purged) continue;
#endif
    if (graph_merge_neighbor_range < 0) {
      /* when no merging, words with different triphone context at word edge
	 should be differenciated */
      if (wg->headphone != now->headphone || wg->tailphone != now->tailphone) {
	continue;
      }
    }
    if (wg->wid == now->wid
	&& wg->lefttime == now->lefttime
	&& wg->righttime == now->righttime) {
      /* same word on the same position is found in current word graph */
#ifdef GDEBUG
      printf("check_merge: same word found: \"%s\"[%d..%d]\n", wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
      for(i=0;i<wg->leftwordnum;i++) {
	w = wg->leftword[i];
	printf("\t left%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
      }
      for(i=0;i<wg->rightwordnum;i++) {
	w = wg->rightword[i];
	printf("\tright%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
      }
#endif
      /* merge contexts */
      merge_contexts(wg, now);
      /* swap contexts of left / right contexts */
      for(i=0;i<now->leftwordnum;i++) {
	swap_rightword(now->leftword[i], now, wg);
	uniq_rightword(now->leftword[i]);
      }
      for(i=0;i<now->rightwordnum;i++) {
	swap_leftword(now->rightword[i], now, wg);
	uniq_leftword(now->rightword[i]);
      }
#ifdef GRAPHOUT_SEARCH
      /* if the left and right contexts of now are already included in wg,
	 and wg already has left node of next word,
	 it means that
	 the current word and the last word context is
	 already included in the existing word graph.
	 So, in the case this partial path should be abandoned.
      */
      for(i=0;i<wg->leftwordnum;i++) {
	if (wg->leftword[i]->wid == next_wid) break;
      }
      if (i < wg->leftwordnum) {
	*merged_p = TRUE;
      }
#endif /* GRAPHOUT_SEARCH */
#ifdef GRAPHOUT_OVERWRITE
      /*  if current hypothesis score is higher than saved,
	  overwrite the scores and not terminate */
      if (
#ifdef GRAPHOUT_OVERWRITE_GSCORE
	  wg->gscore_head < now->gscore_head
#else
	  wg->fscore_head < now->fscore_head
#endif
	  ) {
	  wg->headphone = now->headphone;
	  wg->tailphone = now->tailphone;
	  wg->fscore_head = now->fscore_head;
	  wg->fscore_tail = now->fscore_tail;
	  wg->gscore_head = now->gscore_head;
	  wg->gscore_tail = now->gscore_tail;
#ifdef USE_NGRAM
	  wg->lscore = now->lscore;
#endif
#ifdef CM_SEARCH
	  wg->cmscore = now->cmscore;
#endif
#ifdef GRAPHOUT_SEARCH
	  *merged_p = FALSE;
#endif
      }
#endif /* GRAPHOUT_OVERWRITE */
      /* the merged word should be discarded for later merging from
	 another word, so disable this */
      now->purged = TRUE;
      
      /* return the found one */
      return wg;
    }
  }
  /* if the same word not found, return NULL */
  return NULL;
}
#endif /* GRAPHOUT_DYNAMIC */


/**************************************************************/
/* misc. functions */

/** 
 * <JA>
 * ñξƥȤǽϤ롥ƤϰʲΤȤꡧ
 * <pre>
 *   ID: left=ƥȤID[,ID,...] right=ƥID[,ID,..]
 *   [üե졼..üե졼]
 *   wid=ñID
 *   name="ñ̾"
 *   lname="N-gram ñ̾뤤ϥƥֹ (Julian)"
 *   f=õκüǤʬʸ(g(n) + h(n+1)) n=ñ
 *   f_prev=õαüǤʬʸ(g(n-1) + h(n)) n=ñ
 *   g_head=üǤViterbi g(n)
 *   g_prev=üǤViterbi g(n-1) + LM(n)
 *   lscore=쥹 LM(n)   (Julius ξΤ)
 *   AMavg=ե졼ʿѲ
 *   cmscore=ñ쿮
 * </pre>
 * 
 * @param wg [in] Ϥ륰ñ
 * </JA>
 * <EN>
 * Output information of a graph word in text in the format below:
 * (n means the word)
 *
 * <pre>
 *   ID: left=left_context_ID[,ID,...] right=right_context_ID[,ID,...]
 *   [left_edge_frame...right_edge_frame]
 *   wid=word_id
 *   name="word string"
 *   lname="N-gram word string (Julius) or category number (Julian)"
 *   f="partial sentence score at left edge (g(n) + h(n+1)) on search time"
 *   f_prev="partial sentence score at right edge (g(n-1) + h(n)) on search time"
 *   g_head="accumulated viterbi score at left edge (g(n))"
 *   g_prev="accumulated viterbi score at right edge (g(n-1) + LM(n)"
 *   lscore="language score LM(n)  (Julius only)"
 *   AMavg="average acoustic likelihood per frame"
 *   cmscore="confidence score"
 * </pre>
 * 
 * @param wg [in] graph word to output
 * </EN>
 */
void
put_wordgraph(WordGraph *wg)
{
  int i;
  if (wg == NULL) {
    j_printf("(NULL)\n");
  } else {
    j_printf("%d:", wg->id);
    for(i=0;i<wg->leftwordnum;i++) {
      j_printf((i == 0) ? " left=%d" : ",%d", wg->leftword[i]->id);
    }
    for(i=0;i<wg->rightwordnum;i++) {
      j_printf((i == 0) ? " right=%d" : ",%d", wg->rightword[i]->id);
    }
    j_printf(" [%d..%d]", wg->lefttime, wg->righttime);
    j_printf(" wid=%d name=\"%s\" lname=\"%s\" f=%f f_prev=%f g_head=%f g_prev=%f", wg->wid, wchmm->winfo->woutput[wg->wid], wchmm->winfo->wname[wg->wid], wg->fscore_head, wg->fscore_tail, wg->gscore_head, wg->gscore_tail);
#ifdef USE_NGRAM
    j_printf(" lscore=%f", wg->lscore);
    if (wg->righttime - wg->lefttime + 1 != 0) {
      j_printf(" AMavg=%f", (wg->gscore_head - wg->gscore_tail - wg->lscore) / (float)(wg->righttime - wg->lefttime + 1));
    }
#else
    if (wg->righttime - wg->lefttime + 1 != 0) {
      j_printf(" AMavg=%f", (wg->gscore_head - wg->gscore_tail) / (float)(wg->righttime - wg->lefttime + 1));
    }
#endif
#ifdef CM_SEARCH
    j_printf(" cmscore=%f", wg->cmscore);
#endif
    j_printf(" headphone=%s", wg->headphone->name);
    j_printf(" tailphone=%s", wg->tailphone->name);
    j_printf("\n");
  }
}

/** 
 * <JA>
 * 줿ñ쥰ñƥȽϤ롥
 * 
 * @param root [in] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * Output text information of all the words in word graph.
 * 
 * @param root [in] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_dump(WordGraph *root)
{
  WordGraph *wg;
  j_printf("--- begin wordgraph data ---\n");
  for(wg=root;wg;wg=wg->next) {
    put_wordgraph(wg);
  }
  j_printf("--- end wordgraph data ---\n");
}

/** 
 * <JA>
 * ǥХåѡñ쥰դå롥
 * 
 * @param rootp [in] ñ쥰դΥ롼ȥΡɤؤΥݥ
 * </JA>
 * <EN>
 * For debug: Check the coherence in word graph.
 * 
 * @param rootp [in] pointer to root node of a word graph
 * </EN>
 */
void
wordgraph_check_coherence(WordGraph *rootp)
{
  WordGraph *wg, *wl, *wr;
  int l,r;

  for(wg=rootp;wg;wg=wg->next) {
    /* check ID overflow */
    if (wg->id < 0 || wg->id >= graph_totalwordnum) {
      j_printf("ERROR: invalid id\n");
      put_wordgraph(wg);
      continue;
    }
    /* check link */
    for(l=0;l<wg->leftwordnum;l++){
      wl = wg->leftword[l];
      if (wl->id < 0 || wl->id >= graph_totalwordnum) {
	j_printf("ERROR: invalid id in left context\n");
	put_wordgraph(wg);
	continue;
      }
      for(r=0;r<wl->rightwordnum;r++){
	if (wl->rightword[r] == wg) break;
      }
      if (r >= wl->rightwordnum) {
	j_printf("ERROR: reverse link not found in left context\n");
	put_wordgraph(wg);
	put_wordgraph(wl);
	continue;
      }
    }
    for(r=0;r<wg->rightwordnum;r++){
      wr = wg->rightword[r];
      if (wr->id < 0 || wr->id >= graph_totalwordnum) {
	j_printf("ERROR: invalid id in right context\n");
	put_wordgraph(wg);
	continue;
      }
      for(l=0;l<wr->leftwordnum;l++){
	if (wr->leftword[l] == wg) break;
      }
      if (l >= wr->leftwordnum) {
	j_printf("ERROR: reverse link not found in right context\n");
	put_wordgraph(wg);
	put_wordgraph(wr);
	continue;
      }
    }
  }
}

#endif /* GRAPHOUT */
