#include "license.readme"

#include <cstdlib>
#include <cstring>
#include <cctype>
#include <cstdio>

#include "stdafx.h"
#include "affixmgr.hxx"
#include "affix.hxx"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif


AffixMgr::AffixMgr(const char * affpath) 
{
	// register hash manager and load affix data from aff file
	numpfx = 0;
	numsfx = 0;
	trystring = NULL;
	encoding=NULL;
	if (parse_file(affpath)) {
		throw FALSE;
	}
}


AffixMgr::~AffixMgr() 
{
	for (int i=0;i<numpfx;i++)
		delete pTable[i];
	for (int j=0;j<numsfx;j++)
		delete sTable[j];
	delete [] trystring;
	delete [] encoding;
}


// read in aff file and build up prefix and suffix data structures
int  AffixMgr::parse_file(const char * affpath)
{
	int i, j;
	int numents;

	// io buffers
	char line[MAXLNLEN+1];

	// open the affix file
	FILE * afflst;
	afflst = fopen(affpath,"r");
	if (!afflst) {
		return 1;
	}

	// step one is to parse the affix file building up the internal
	// affix data structures

	numpfx = 0;       // number of prefixes
	numsfx = 0;       // number of suffixes
	numents = 0;      // number of affentry structures to parse
	char flag='\0';   // affix char identifier
	//{
	short ff=0;
	char ft;
	struct affentry * ptr= NULL;
	struct affentry * nptr= NULL;

	// read in each line ignoring any that do not
	// start with PFX or SFX

	while (fgets(line,MAXLNLEN,afflst)) {
		mychomp(line);

		/* parse in the try string */
		if (strncmp(line,"TRY",3) == 0) {
			char * tp = line;
			char * piece;
			i = 0;
			while ((piece=mystrsep(&tp,' '))) {
				if (*piece != '\0') {
					switch(i) {
						case 0: break;
						case 1: 
							ASSERT( trystring == NULL ); // multiple try strings
							trystring = mystrdup(piece);
							break;
						default: break;
					}
					i++;
				}
				delete [] piece;
			}
		}

		/* parse in the name of the character set used by the .dict and .aff */
		if (strncmp(line,"SET",3) == 0) {
			char * tp = line;
			char * piece;
			i = 0;
			while ((piece=mystrsep(&tp,' '))) {
				if (*piece != '\0') {
					switch(i) {
						case 0: break;
						case 1:
							ASSERT( encoding == NULL ); // multiple encodings
							encoding = mystrdup(piece); 
							break;
						default: break;
					}
					i++;
				}
				delete [] piece;
			}
		}

		// get the type of this affix: P - prefix, S - suffix
		ft = ' ';
		if (strncmp(line,"PFX",3) == 0) ft = 'P';
		else if (strncmp(line,"SFX",3) == 0) ft = 'S';
		if (ft != ' ') {
			char * tp = line;
			char * piece;
			i = 0;

			// split line into pieces
			while ((piece=mystrsep(&tp,' '))) {
				if (*piece != '\0') {
					switch(i) {
						// piece 1 - is type of affix
						case 0: 
							#ifdef _DEBUG
							if ( ft == 'P' )
								ASSERT( strncmp(piece, "PFX", 3) == 0 ); // format error
							else if (ft == 'S' )
								ASSERT( strncmp(piece, "SFX", 3) == 0 ); // format error
							#endif /* _DEBUG */
							break;

						// piece 2 - is affix char
						case 1:
							flag = *piece;
							break;

						// piece 3 - is cross product indicator 
						case 2:
							if (*piece == 'Y') 
								ff = XPRODUCT;
							break;

						// piece 4 - is number of affentries
						case 3:
							numents = atoi(piece);
							ASSERT( ptr == NULL );
							ptr = new affentry[numents];
							ptr->cpflag = ff;

						default: break;
					}
					i++;
				}
				delete [] piece;
			}
			ASSERT( i > 3 );


			// store away ptr to first sffentry
			nptr = ptr;

			// now parse numents affentries for this affix
			for (j=0; j < numents; j++) {
				fgets(line,MAXLNLEN,afflst);
				mychomp(line);
				tp = line;
				i = 0;

				// split line into pieces
				while ((piece=mystrsep(&tp,' '))) {
					if (*piece != '\0') {
						switch(i) {
							// piece 1 - is type
							case 0:
								#ifdef _DEBUG
								if ( ft == 'P' )
									ASSERT( strncmp(piece, "PFX", 3) == 0 ); // format error
								else if (ft == 'S' )
									ASSERT( strncmp(piece, "SFX", 3) == 0 ); // format error
								#endif /* _DEBUG */

								nptr->cpflag = ptr->cpflag;
								break;

							// piece 2 - is affix char
							case 1: 
								ASSERT( *piece == flag );
								break;

							// piece 3 - is string to strip or 0 for null 
							case 2:
								nptr->strip = mystrdup(piece);
								nptr->stripl = strlen(nptr->strip);
								if (strcmp(nptr->strip,"0") == 0) {
									*(nptr->strip) = '\0';
									nptr->stripl = 0;
								}
								break;

							// piece 4 - is affix string or 0 for null
							case 3:
								nptr->appnd = mystrdup(piece);
								nptr->appndl = strlen(nptr->appnd);
								if (strcmp(nptr->appnd,"0") == 0) {
									*(nptr->appnd) = '\0';
									nptr->appndl = 0;
								}
								break;

							// piece 5 - is the conditions descriptions
							case 4: 
								encodeit(nptr,piece);

							default: break;
						}
						i++;
					}
					delete [] piece;
				}
				ASSERT ( i > 4 );
				nptr++;
			}

			// now create the correct Affix Object (Prefix or Suffix)
			if (ft == 'P') {
				if ( numpfx < MAXAFFIXES ) {
					pTable[numpfx++] = (Affix *) new Prefix(this,flag,numents,ptr);
				} else {
					ASSERT( FALSE ); // Too many Prefixes
					delete [] ptr;
				}
			} else {
				if ( numsfx < MAXAFFIXES ) {
					sTable[numsfx++] = (Affix *) new Suffix(this,flag,numents,ptr);
				} else {
					ASSERT( FALSE ); // Too many Affixes
					delete [] ptr;
				}
			}
			ptr = NULL;
			nptr = NULL;
			piece = NULL;
			numents = 0;
		}
	}
	fclose(afflst);
	return 0;
}


// takes aff file condition string and creates the
// conds array - please see the appendix at the end of the
// file affix.cxx which describes what is going on here
// in much more detail
void AffixMgr::encodeit(struct affentry * ptr, char * cs) const
{
	unsigned char c;
	int i, j, k;
	unsigned char mbr[MAXLNLEN];

	// now clear the conditions array */
	for (i=0;i<SETSIZE;i++) ptr->conds[i] = (unsigned char) 0;

	// now parse the string to create the conds array */
	int nc = strlen(cs);
	int neg = 0;   // complement indicator
	int grp = 0;   // group indicator
	int n = 0;     // number of conditions
	int ec = 0;    // end condition indicator
	int nm = 0;    // number of member in group

	// if no condition just return
	if (strcmp(cs,".")==0) {
		ptr->numconds = 0;
		return;
	}

	i = 0;
	while (i < nc) {
		c = *(cs + i);

		// start group indicator
		if (c == '[') {
			grp = 1;
			c = 0;
		}

		// complement flag
		if ((grp == 1) && (c == '^')) {
			neg = 1;
			c = 0;
		}

		// end goup indicator
		if (c == ']') {
			ec = 1;
			c = 0;
		}

		// add character of group to list
		if ((grp == 1) && (c != 0)) {
			*(mbr + nm) = c;
			nm++;
			c = 0;
		}

		// end of condition 
		if (c != 0) {
			ec = 1;
		}

		if (ec) {
			ASSERT( n < 8 );
			if (grp == 1) {
				if (neg == 0) {
					// set the proper bits in the condition array vals for those chars
					for (j=0;j<nm;j++) {
						k = mbr[j];
						ptr->conds[k] |= (1 << n);
					}
				} else {
					// complement so set all of them and then unset indicated ones
					for (j=0;j<SETSIZE;j++) ptr->conds[j] |= (1 << n);
					for (j=0;j<nm;j++) {
						k = mbr[j];
						ptr->conds[k] &= ~(1 << n);
					}
				}
				neg = 0;
				grp = 0;
				nm = 0;
			} else {
				// not a group so just set the proper bit for this char
				ptr->conds[c] = ptr->conds[c] | (1 << n);
			}
			n++;
			ec = 0;
		}

		i++;
	}
	ptr->numconds = n;
	return;
}



/* cross check suffixes with a specific prefix */
struct hentry * AffixMgr::cross_check (HashMgr *pHMgr, const char * word, int len, int sfxopts, const Affix * ppfx) const
{
	int i=0;
	struct hentry * rv= NULL;
	// go through every possible suffix with this prefix
	while (i < numsfx)  {
		rv = ((Suffix *)sTable[i])->check(pHMgr, word, len, sfxopts, (Prefix *)ppfx); 
		if (rv) return rv;
		i++;
	}
	return NULL;
}



// check if word with affixes is correctly spelled
struct hentry * AffixMgr::affix_check (HashMgr *pHMgr, const char * word, int len) const
{
	int i;
	struct hentry * rv= NULL;

	// first check all prefixes (also cross check with suffixes if need be)
	i=0;
	while (i < numpfx) {
		rv = ((Prefix *)pTable[i])->check(pHMgr, word, len); 
		if (rv) return rv;
		i++;
	}

	// if still not found check all suffixes
	i = 0;
	while (i < numsfx) {
		rv = ((Suffix *)sTable[i])->check(pHMgr, word, len, 0, NULL); 
		if (rv) return rv;
		i++;
	}

	return NULL;
}


// utility method to convert a string to lower case
void AffixMgr::strtolower(char * p) const
{
	char * c = p;
	while (*c) {
		*c = (char) tolower((unsigned char)(*c));
		c++;
	}
}


// return text encoding of dictionary
char * AffixMgr::get_encoding() const
{
	if (! encoding ) return NULL;
	return mystrdup(encoding);
}


// return the preferred try string for suggestions
char * AffixMgr::get_try_string() const
{
	if (! trystring ) return NULL;
	return mystrdup(trystring);
}



// utility method to look up root words in hash table
struct hentry * AffixMgr::lookup(HashMgr *pHMgr, const char * word) const
{
	if (! pHMgr) return NULL;
	return pHMgr->lookup(word);
}


// strip strings into token based on single char delimiter
// acts like strsep() but only uses a delim char and not 
// a delim string
char * mystrsep(char ** stringp, const char delim)
{
	if ( stringp == NULL || *stringp == NULL || **stringp == '\0' )
		return NULL;

	while ( **stringp == delim ) ++(*stringp);
	char * rv = NULL;
	char * mp = *stringp;
	int n = strlen(mp);
	char * dp = strchr( mp, delim );
	if (dp) {
		*stringp = dp+1;
		int nc = dp - mp;
		rv = new char[nc+1];
		ASSERT( rv );
		strncpy(rv,mp,nc);
		*(rv+nc) = '\0';
		return rv;
	} else {
		rv = new char[n+1];
		ASSERT( rv );
		strncpy(rv, mp, n);
		*(rv+n) = '\0';
		*stringp = mp + n;
		return rv;
	}
}


char * mystrdup(const char * s)
{
	char * d = NULL;
	if (s) {
		int sl = strlen(s);
		d = new char[sl+1];
		ASSERT( d );
		if (d) strcpy(d, s);
	}
	return d;
}


void mychomp(char * s)
{
	// Remove all trailing white space from a string
	int k = strlen(s) - 1;
	while (k > -1) {
		switch ( s[k] ) {
			case '\r':
			case '\n':
			case ' ':
			case '\t':
				s[k] = '\0';
				break;
			default:
				return;
		}
		--k;
	}
}

