#include "app.h"

/****************************************************************/
/**************** GUI subroutines *******************************/
/****************************************************************/

#ifdef GUID_DEFINE_HEADER
/* needed for compile with static SDL library in mingw */
#include <initguid.h>
#include <dinput.h>
#endif

/* SDL headers */
#include <SDL.h>
#include <SDL_ttf.h>

/* GUI default parameters */
#define DEFAULT_SCREEN_WIDTH  800
#define DEFAULT_SCREEN_HEIGHT 480
#define SCREEN_BPP 32
#define DEFAULT_FONT_PATH "./data/ipagp.ttf"
#define DEFAULT_FONT_SIZE 100
#define WINTITLE "Word Flow"
#define FONT_X 25
#define FONT_Y 25
#define PROM_HEIGHT 20
#define PROM_INTERVAL 15
#define POWER_BASE 4.6
#define POWER_MAX 9.0
#ifdef CHARACTER_CONVERSION
# ifdef USE_WIN32_MULTIBYTE
#  define SYSTEM_CHAR_CODE "utf-8"
# else
#  define SYSTEM_CHAR_CODE "utf8"
# endif
#endif

/* misc */
#define MAXBUFLEN 4096

/* GUI widgets, font data, and parameters */
static SDL_Surface *gScreenSurface;
static TTF_Font *Font = NULL;
static int screen_width = DEFAULT_SCREEN_WIDTH;
static int screen_height = DEFAULT_SCREEN_HEIGHT;
static char *font_path = NULL;
static int font_size = DEFAULT_FONT_SIZE;
static Uint32 meter_color;
#ifdef CHARACTER_CONVERSION
static boolean charconv_enable = FALSE;
static char *char_code = NULL;
#endif

/*************************************************************/
/** 
 * Initialize SDL.
 * 
 * @return 0 on success, or -1 on failure.
 */
static int
GInit(int maxwordlen)
{
  int i;
  int width;

  /* initialize library */
  if (SDL_Init(SDL_INIT_VIDEO) != 0) {
    fprintf(stderr, "Error: failed to initialize SDL library: %s.\n", SDL_GetError());
    return -1;
  }

  /* call clean up at exit */
  atexit(SDL_Quit);

  /* set window title */
  SDL_WM_SetCaption(WINTITLE, NULL);

  printf("width=%d, fontsize=%d\n", screen_width, font_size);

  /* request global software surface */
  gScreenSurface = SDL_SetVideoMode(screen_width, screen_height, SCREEN_BPP, SDL_SWSURFACE /* | SDL_ANYFORMAT | SD_FULLSCREEN */);
  if (gScreenSurface == NULL) {
    fprintf(stderr, "Erorr: failed to set video mode to %dx%dx%d: %s.\n", 
	    screen_width, screen_height, SCREEN_BPP, SDL_GetError());
    exit(1);
    return -1;
  }
  printf("set video mode to %d %d %d bpp\n", screen_width, screen_height, gScreenSurface->format->BitsPerPixel);

  /* initialize ttf font functions */
  if (TTF_Init()) return;

  /* load font */
  {
    FILE *fp = fopen(font_path, "r");
    if (!fp) return;
    fclose(fp);
  }
  Font = TTF_OpenFont(font_path, font_size);

  /* set level meter color */
  meter_color = SDL_MapRGB(gScreenSurface->format, 80, 45, 0);

  return 0;
}

/** 
 * Free some global variables for SDL.
 * 
 */
static void
GFree()
{
  int i;

  if (Font) TTF_CloseFont(Font);
}

/** 
 * Poll event queue and process it if any (non-block).
 * 
 * @return -1 when quit event detected, otherwise 0.
 */
static int
GPollEvent()
{
  SDL_Event ev;
  SDLKey *key;

  while(SDL_PollEvent(&ev)) {
    switch(ev.type){
    case SDL_QUIT:// ɥΡߥܥ󤬲줿ʤ
      return -1;
      break;
    case SDL_KEYDOWN:// ܡɤϤä
      key=&(ev.key.keysym.sym); // ɤΥ줿
      if(*key==27){// ESC
	return -1;
      }
      break;
    }
  }
  
  return 0;
}

/** 
 * Application quit function.
 * 
 * @param recog [in] recognition instance.
 */
static void
EndProc(Recog *recog)
{
  /* free the whole recognition instance */
  j_recog_free(recog);
  /* free some global variables for SDL */
  GFree();
  /* end procedure for SDL */
  SDL_Quit();
  /* exit this process */
  exit(1);
}

/*************************************************************/
/** 
 * Subroutine to fill in the total application window.
 * 
 * @param color [in] color code to fill.
 */
static void
GFillScreen(Uint32 color)
{
  SDL_Rect dest;

  dest.x = 0;dest.y = FONT_Y;
  dest.w = screen_width;
  dest.h = screen_height;
  SDL_FillRect( gScreenSurface, &dest, color );
  //SDL_FillRect( gScreenSurface, &dest, SDL_MapRGB(gScreenSurface->format, r, g,b));
  
}

/** 
 * Output a word string as a final candidate.
 * 
 * @param recog [in] recognition instance
 * @param word [in] word to be displayed
 */
static void
GProcessOutput(RecogProcess *r, int framelen, WORD_ID word)
{
  static char buf[MAXBUFLEN];
  SDL_Color col = {70, 255, 120, 0}; /* color of the final candidate string */
  SDL_Rect dest;
  int i;
  SDL_Surface *TextSurface2;
  int t;

#ifdef CHARACTER_CONVERSION
  if (charconv_enable) {
    /* do character conversion to utf8 */
    charconv(r->lm->winfo->woutput[word], buf, MAXBUFLEN);
  } else {
    buf[0] = '\0';
    strcpy(buf, r->lm->winfo->woutput[word]);
  }
#else
  strcpy(buf, r->lm->winfo->woutput[word]);
#endif
  fflush(stdout);

  /* write the word string to a text surface */
  //TextSurface2 = TTF_RenderUTF8_Solid( Font, buf, col );
  //TextSurface2 = TTF_RenderUTF8_Shaded( Font, buf, col );
  TextSurface2 = TTF_RenderUTF8_Blended( Font, buf, col );

  /* fill in the whole screen with black */
  GFillScreen(0);

  /* scroll from the current bottom to the top */
  for(t=framelen; t >= 0; t -= 30) {
    dest.x = FONT_X;
    dest.y = FONT_Y + t;
    SDL_BlitSurface(TextSurface2, NULL, gScreenSurface, &dest );
    SDL_Flip(gScreenSurface);
    dest.w = screen_width;
    dest.h = font_size + 30;
    SDL_Delay(40);
    SDL_FillRect( gScreenSurface, &dest, SDL_MapRGB(gScreenSurface->format, 0, 0, 0));
  }
  
  /* output the final result  */
  dest.x = FONT_X;
  dest.y = FONT_Y;
  SDL_BlitSurface(TextSurface2, NULL, gScreenSurface, &dest );
  SDL_Flip(gScreenSurface);

  /* reset level meter color */
  meter_color = SDL_MapRGB(gScreenSurface->format, 80, 45, 0);

  /* free the text surface */
  SDL_FreeSurface(TextSurface2);
}

/**********************************************************************/
/************* speech recognition callbacks ***************************/
/**********************************************************************/

/** 
 * Julius callback function to check for SDL events while recognition.
 * 
 * @param recog [in] recognition instance
 * @param dummy [in] a dummy argument
 */
static void
callback_poll_sdl(Recog *recog, void *dummy)
{
  /* if application quit event occured, terminate process */
  if(GPollEvent() != 0) EndProc(recog);
}

/** 
 * Callback function to process final result output, to output the
 * final result.  This will also be called when recognition was
 * failed.
 * 
 * @param recog [in] recognition instance
 * @param dummy [in] dummy argument
 */
static void
process_result(Recog *recog, void *dummy)
{
  int i, n, num;
  WORD_ID *seq;
  int seqnum;
  Sentence *s;
  RecogProcess *r;

  /* assume only one instance */
  r = recog->process_list;

  if (r->result.status < 0) {
    /* recognition result cannot be obtained for the input */
    switch(r->result.status) {
    case -3:                    /* input rejected by GMM */
      //server_send("%s=gmm\n", MODSTR);
      printf("Rejected by GMM\n");
      break;
    case -2:
      //server_send("%s=shortinput\n", MODSTR);
      printf("Rejected by short input\n");
      break;
    case -1:
      /* ??????? RECOGFAIL */
      ////module_send(module_sd, "<RECOGFAIL/>\n.\n");
      printf("Recognition failed\n");
      break;
    }
    return;
  }

  /* use only the best result */
  s = &(r->result.sent[0]);

  /* output log to stdout */
  printf("recognized: %d: %s\n", r->result.num_frame, r->lm->winfo->woutput[s->word[0]]);

  /* output the string */
  GProcessOutput(r, r->am->mfcc->f - 1, s->word[0]);
}

/**********************************************************************/
/************* level indicator ****************************************/
/**********************************************************************/
static MFCCWork *mfccwrk;
static SP16 *window;
static int windowlen;
static int windownum;
static float *power;
static int powerlen;
static int nframe;

/* draw the spectrum stored in power[0..powerlen-1] */
static void
write_spectrum()
{
  int i;
  SDL_Rect dest;
  float rate;
  int height;

  /* clear */
  dest.x = 20;
  dest.y = 0;
  dest.w = powerlen * 1 + 1;
  dest.h = PROM_HEIGHT;
  SDL_FillRect(gScreenSurface, &dest, meter_color);

  for (i=0;i<powerlen;i++) {
    rate = power[i] / POWER_MAX;
    
    height = rate * PROM_HEIGHT;
    dest.x = 20 + i;
    dest.y = 20 - height;
    dest.w = 1;
    dest.h = height;
    SDL_FillRect(gScreenSurface, &dest, SDL_MapRGB(gScreenSurface->format, 200, 100, 0));
  }
  SDL_Flip(gScreenSurface);
}

/* initialize work area for level indicator */
static void
output_input_init(Recog *recog)
{
  int i;

  windowlen = recog->jconf->input.framesize + 1;
  window = mymalloc(sizeof(SP16) * windowlen);
  windownum = 0;
  mfccwrk = WMP_work_new(recog->mfcclist->para);
  powerlen = mfccwrk->fb.fftN / 2;
  power = mymalloc(sizeof(float) * powerlen);
  for(i=0;i<powerlen;i++) power[i] = 0.0;
  nframe = 0;
}

/* Julius callback to be called at each triggered input.  This compute
   the power spectrum at each frame and call write_spectrum() to draw
   it.
*/
static void
output_input(Recog *recog, SP16 *speech, int samplenum, void *dummy)
{
  int i;
  int now;
  Value *para;

  para = recog->mfcclist->para;
  now = 0;

  while (now < samplenum) {
    for(i = min(windowlen - windownum, samplenum - now); i > 0 ; i--) {
      window[windownum++] = (float) speech[now++];
    }
    if (windownum < windowlen) break;
    for (i=0; i < windowlen; i++) {
      mfccwrk->bf[i+1] = (float) window[i];
    }
    ZMeanFrame(mfccwrk->bf, para->framesize);
    PreEmphasise(mfccwrk->bf, para->framesize, para->preEmph);
    Hamming(mfccwrk->bf, para->framesize, mfccwrk);
    MakeFBank(mfccwrk->bf, mfccwrk, para);

    for(i=0;i<powerlen;i++) {
      power[i] += sqrt(mfccwrk->fb.Re[i] * mfccwrk->fb.Re[i] + mfccwrk->fb.Im[i] * mfccwrk->fb.Im[i]);
    }
    nframe++;
    if (nframe >= PROM_INTERVAL) {
      for(i=0;i<powerlen;i++) {
	power[i] = log(power[i] / (float) nframe) - POWER_BASE;
	if (power[i] < 0.0) power[i] = 0.0;
      }
      write_spectrum();
      for(i=0;i<powerlen;i++) power[i] = 0.0;
      nframe = 0;
    }
    memmove(window, &(window[recog->jconf->input.frameshift]), sizeof(SP16) * (windowlen - recog->jconf->input.frameshift));
    windownum -= recog->jconf->input.frameshift;
  }
}

/** 
 * Callback function that will be called periodically while recognizing
 * at a certain frame interval.  This will list up for the word ends
 * survived at the time, compute confidence scores, and output the
 * "word trail" on the window.
 * 
 * @param recog [in] recognition instance
 * @param dummy [in] dummy argument
 */
static void
output_trail(Recog *recog, void *dummy)
{
  TRELLIS_ATOM *tre;
  TRELLIS_ATOM *tremax = NULL;
  int t;
  static float *cm = NULL;
  static int cmlen;
  static char *buf;
  static short *dens;
  static WORD_ID *didx;
  static WORD_ID didxnum[9];
  int i;
  RecogProcess *r;

  /* assume only one recognizer */
  r = recog->process_list;

  if (cm) {			/* memory already assigned */
    if (cmlen != r->lm->winfo->num) {
      /* clear */
      free(cm);
      free(buf);
      free(dens);
      free(didx);
      cm = NULL;
    }
  }
  if (!cm) {
    /* assign memory */
    cmlen = r->lm->winfo->num;
    cm = (float *)mymalloc(sizeof(float) * cmlen);
    dens = (short *)mymalloc(sizeof(short) * cmlen);
    didx = (WORD_ID *)mymalloc(sizeof(WORD_ID) * cmlen * 9);
    buf = (char *)mymalloc(cmlen+1);
  }

  /* current time */
  t = r->am->mfcc->f - 1;

  /* compute confidence score for all survived words */
  {
    float sum = 0.0;
    float maxscore = LOG_ZERO;
    int i;

    for(i=0;i<cmlen;i++) cm[i] = 0.0;
    
    for (tre = r->backtrellis->list; tre && tre->endtime == t; tre = tre->next) {
      if (maxscore < tre->backscore) {
	maxscore = tre->backscore;
	tremax = tre;
      }
    }
    for (tre = r->backtrellis->list; tre && tre->endtime == t; tre = tre->next) {
      sum += pow(10, r->config->annotate.cm_alpha * (tre->backscore - maxscore));
    }
    for (tre = r->backtrellis->list; tre && tre->endtime == t; tre = tre->next) {
      cm[tre->wid] = pow(10, r->config->annotate.cm_alpha * (tre->backscore - maxscore)) / sum;
    }
  }

#if 0
  {
    int d;
    printf("%3d: ", t);
    buf[0] = buf[cmlen] = '\0';
    for(i=0;i<cmlen;i++) {
      d = cm[i] * 10.0;
      if (d > 9) d = 9;
      if (d > 0) buf[i] = '0' + d;
      else buf[i] = ' ';
    }
    if (tremax) printf("%s| %s\n", buf, r->lm->winfo->woutput[tremax->wid]);
    else printf("%s|\n", buf);
  }
#endif

  if (t == 0) {
    /* first time: clear trail data and fill screen with background color */
    memset(dens, 0, sizeof(short) * cmlen);
    //GFillScreen(SDL_MapRGB(gScreenSurface->format, 0, 0, 70));
    GFillScreen(0);
    meter_color = SDL_MapRGB(gScreenSurface->format, 130, 70, 0);
  }

  {
    /* output word trail according to the current (and previous)
       survived words and its confidence score */
    int d, j, k;
    static char inbuf[MAXBUFLEN];
    static char buf[MAXBUFLEN];
    SDL_Color col = {0, 0, 0, 0};
    SDL_Rect dest;
    SDL_Surface *TextSurface;

    /* When a new word appears, its intencity (cm score) will be stored
       to dens[].  The intencity will be aged at the following frames:
       the existing dens[] from the last frame will be decreased frame 
       by frame. */
    /* then, the words with intencity > 0 will be listed according
       to the intencity to didx[0..didxnum[]-1] */
    for(i=0;i<9;i++) didxnum[i] = 0;
    for(i=0;i<cmlen;i++) {
      d = cm[i] * 10.0;
      if (d > 9) d = 9;
      if (dens[i] < d) dens[i] = d;
      else if (dens[i] > 0) dens[i]--;
      if (dens[i] > 0) {
	didx[(dens[i]-1) * cmlen + didxnum[dens[i]-1]] = i;
	didxnum[dens[i]-1]++;
      }
    }

    /* rwrite words with intencity > 0 with corresponding color intencity
       to a single textsurface, which shows the word trail
       */
    printf("%3d:", t);
    //for(j=0;j<9;j++) {
    for(j=1;j<9;j++) {
      d = j + 1;
      for(k=0;k<didxnum[j];k++) {
	i = didx[j * cmlen + k];
#ifdef CHARACTER_CONVERSION
        if (charconv_enable) {
          charconv(r->lm->winfo->woutput[i], buf, MAXBUFLEN);
        } else {
          strcpy(buf, r->lm->winfo->woutput[i]);
        }
#else
        strcpy(buf, r->lm->winfo->woutput[i]);
#endif
	col.r = col.g = col.b = (d * 220.0) / 9.0;
	TextSurface = TTF_RenderUTF8_Solid( Font, buf, col );
	dest.x = FONT_X;
	/* the displaying position will be shifted by the time */
	dest.y = FONT_Y + t;
	SDL_BlitSurface(TextSurface, NULL, gScreenSurface, &dest);
	SDL_FreeSurface(TextSurface);
	printf(" %s(%d)", r->lm->winfo->woutput[i], d);
      }
    }
    SDL_Flip(gScreenSurface);

    printf("\n");
  }
}  

/************************************************************************/
/************************************************************************/
/************************************************************************/
int
main(int argc, char *argv[])
{
  char *jconffile, *dictfile;
  Recog *recog;
  Jconf *jconf;
  int frame_interval = 0;

  /* when linked with SDL library, print will not go to tty, instead
     saved to stdout.txt and stderr.txt. */
  /* output system log to a file */
  //FILE *fp; fp = fopen("log.txt", "w"); jlog_set_output(fp);
  /* inihibit system log output (default: stdout) */
  //jlog_set_output(NULL);

  /* if no option argument, output julius usage and exit */
  if (argc == 1) {
    fprintf(stderr, "Wordflow %s - based on %s rev.%s (%s)\n", WORDFLOW_VERSION, JULIUS_PRODUCTNAME, JULIUS_VERSION, JULIUS_SETUP);
    fprintf(stderr, "Usage: %s [options] dictfile\n", argv[0]);
    fprintf(stderr, "Options:\n\t-C jconffile\n\t[-w width]\n\t[-h height]\n\t[-f ttffile]\n\t[-s fontsize]\n\t[-i update_interval_msec]\n");
#ifdef CHARACTER_CONVERSION
    fprintf(stderr, "\t[-c charcode]\n");
#endif
    return -1;
  }

  /* option parsing */
  {
    int i;
    jconffile = NULL;
    dictfile = NULL;
    for(i=1;i<argc;i++) {
      if (argv[i][0] == '-') {
	switch(argv[i][1]) {
	case 'C':
	  if (++i >= argc) {fprintf(stderr, "Error in option\n"); return -1;}
	  jconffile = argv[i];
	  break;
	case 'w':
	  if (++i >= argc) {fprintf(stderr, "Error in option\n"); return -1;}
	  screen_width = atoi(argv[i]);
	  break;
	case 'h':
	  if (++i >= argc) {fprintf(stderr, "Error in option\n"); return -1;}
	  screen_height = atoi(argv[i]);
	  break;
	case 'f':
	  if (++i >= argc) {fprintf(stderr, "Error in option\n"); return -1;}
	  font_path = argv[i];
	  break;
	case 's':
	  if (++i >= argc) {fprintf(stderr, "Error in option\n"); return -1;}
	  font_size = atoi(argv[i]);
	  break;
	case 'i':
	  if (++i >= argc) {fprintf(stderr, "Error in option\n"); return -1;}
	  frame_interval = atoi(argv[i]);
	  break;
#ifdef CHARACTER_CONVERSION
	case 'c':
	  if (++i >= argc) {fprintf(stderr, "Error in option\n"); return -1;}
          char_code = argv[i];
	  break;
#endif
	}
      } else {
	dictfile = argv[i];
      }
    }
    if (jconffile == NULL) {
      fprintf(stderr, "Error in option\n"); return -1;
    }
    if (dictfile == NULL) {
      fprintf(stderr, "Error in option\n"); return -1;
    }
    if (font_path == NULL) font_path = DEFAULT_FONT_PATH;
#ifdef CHARACTER_CONVERSION
    if (char_code == NULL) char_code = SYSTEM_CHAR_CODE;
#endif
  }

#ifdef CHARACTER_CONVERSION
  /* initialize charconv */
#ifdef USE_WIN32_MULTIBYTE
  if (strcmp(char_code, "euc-jp") == 0) char_code = "euc";
  if (strcmp(char_code, "eucjp") == 0) char_code = "euc";
  if (strcmp(char_code, "utf8") == 0) char_code = "utf-8";
#endif
  if (strcmp(char_code, SYSTEM_CHAR_CODE) == 0) {
    charconv_enable = FALSE;
  } else {
    charconv_enable = TRUE;
    if (charconv_setup_real(char_code, SYSTEM_CHAR_CODE) == FALSE) {
      fprintf(stderr, "Error: character set conversion setup failed\n");
      return -1;
    }
  } 
#endif

  /* load configurations from arguments or jconf file */
  //   jconf = j_jconf_new(); j_config_load_file(jconf, jconffile);
  // or
  //   jconf = j_config_load_file_new(jconffile);
  if ((jconf = j_config_load_file_new(jconffile)) == NULL) {
    fprintf(stderr, "Try `-help' for more information.\n");
    return -1;
  }

  /* load dict file */
  if (multigram_add_prefix_list(dictfile, NULL, jconf->lm_root, LM_DFA_WORD) == FALSE) {
    fprintf(stderr, "Error: failed to set word list\n");
    return -1;
  }

  /* set (force) frame interval if specified */
  if (frame_interval != 0) {
    jconf->search_root->output.progout_interval = frame_interval;
  }

  /* create recognition instance from a jconf */
  if ((recog = j_create_instance_from_jconf(jconf)) == NULL) {
    fprintf(stderr, "Error in startup\n");
    return -1;
  }

  output_input_init(recog);

  /* initialize SDL */
  {
    int l, i, k;
    WORD_INFO *winfo;
    l = 0;
    winfo = recog->process_list->lm->winfo;
    for(i=0;i<winfo->num;i++) {
      k = strlen(winfo->woutput[i]);
      if (l < k) l = k;
    }
    if (GInit(l) != 0) return -1;
  }
  /* register callbacks */
  /* check GUI events periodically */
  callback_add(recog, CALLBACK_POLL, callback_poll_sdl, NULL);
  /* draw word trail while recognition at certain frame interval */
  callback_add(recog, CALLBACK_RESULT_PASS1_INTERIM, output_trail, NULL);
  /* draw final recognition result */
  callback_add(recog, CALLBACK_RESULT, process_result, NULL);
  /* draw level indicator at top */
  callback_add_adin(recog, CALLBACK_ADIN_CAPTURED, output_input, NULL);

  /* initialize and standby audio input device */
  /* for microphone or other threaded input, ad-in thread starts at this time */
  if (j_adin_init(recog) == FALSE) {
    /* error */
    return -1;
  }

  /* output system information */
  j_recog_info(recog);

  /* enter main recognition loop here */
  /* begin A/D input */
  switch(j_open_stream(recog, NULL)) {
  case 0:			/* succeeded */
    break;
  case -1:      		/* error */
    fprintf(stderr, "error in input stream\n");
    return;
  case -2:			/* end of recognition process */
    fprintf(stderr, "failed to begin input stream\n");
    return;
  }

  /* start main recognition loop for the input stream */
  j_recognize_stream(recog);

  /* release all */
  EndProc(recog);

  return(0);
}
