/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * Author: Charles Kerr <charles@rebelbase.com>
 *
 * Copyright (C) 2000, 2001  Pan Development Team <pan@rebelbase.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * 
 */

#include <config.h>

#include <math.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>

#include <glib.h>

#include <pan/article.h>
#include <pan/base-prefs.h>
#include <pan/debug.h>
#include <pan/file-headers.h>
#include <pan/pan-i18n.h>
#include <pan/pan-glib-extensions.h>
#include <pan/group.h>
#include <pan/log.h>
#include <pan/status-item.h>
#include <pan/util-file.h>

/***
****
****
****
***/

static gboolean
file_headers_load_group (Group * group, StatusItem * status)
{
	gboolean success = FALSE;
        gchar * path;
	GArray *dat, *idx;
	debug_enter ("file_headers_load_group");

	g_return_val_if_fail (group!=NULL, FALSE);

	if (status!=NULL)
		status_item_emit_status_va (status, _("Loading %s"), group_get_readable_name(group));

	/* open the index file */
	path = g_strdup_printf ("/%s/%s/%s.idx", data_dir, group->server->name, group->name);
	idx = read_file (path);
	g_free (path);

	/* open the data file */
	path = g_strdup_printf ("/%s/%s/%s.dat", data_dir, group->server->name, group->name);
	dat = read_file (path);
	g_free (path);

	/* allocate the articles array */
	pan_warn_if_fail (group->_articles == NULL);
	group_get_articles (group);

	if (dat!=NULL && idx!=NULL)
	{
		const gchar * march = idx->data;
		const glong version = get_next_token_int (march, '\n', &march);

		if (version==1 || version==2 || version==3 || version==4)
		{
			int i;
			long l;
			const long qty = get_next_token_long (march, '\n', &march);
			GPtrArray * addme = g_ptr_array_new ();
			gint purged_article_count = 0;

			pan_warn_if_fail (group->_one_big_chunk == NULL);
			group->_one_big_chunk = dat->data;

			for (i=0; i!=qty; ++i)
			{
				Article * a = article_new (group);
				int extra_header_qty;
				int j;

				/* message id */
				l = get_next_token_long (march, '\n', &march);
				if (0<=l && l<dat->len)
					a->message_id = dat->data + l;

				/* author */
				if (version<2) /* version 2 split author into 2 fields */
				{
					l = get_next_token_long (march, '\n', &march);
					if (0<=l && l<dat->len)
						article_init_author_from_header (a, dat->data+l);
				}
				else
				{
					l = get_next_token_long (march, '\n', &march);
					if (0<=l && l<dat->len)
						a->author_addr = dat->data + l;

					l = get_next_token_long (march, '\n', &march);
					if (0<=l && l<dat->len)
						a->author_real = dat->data + l;
				}

				/* subject */
				l = get_next_token_long (march, '\n', &march);
				if (0<=l && l<dat->len)
					a->subject = dat->data + l;

				/* date string - removed in version 3 */
				if (version<3)
					skip_next_token (march, '\n', &march);

				/* references */
				l = get_next_token_long (march, '\n', &march);
				if (0<=l && l<dat->len)
					article_init_header (a, HEADER_REFERENCES, dat->data+l, 0);
				/* xrefs added in version 4 */ if (version>=4) { l = get_next_token_long (march, '\n', &march); if (0<=l && l<dat->len) a->xref = dat->data + l; }
				/* numeric fields */
				a->part           = (gint16) get_next_token_int (march, '\n', &march);
				a->parts          = (gint16) get_next_token_int (march, '\n', &march);
				a->linecount      = (guint16) get_next_token_int (march, '\n', &march);
				a->crosspost_qty  = (gint8) get_next_token_int (march, '\n', &march);
				a->state          = (guint16) get_next_token_int (march, '\n', &march);
				a->date           = (time_t) get_next_token_ulong (march, '\n', &march);
				a->number         = (gulong) get_next_token_ulong (march, '\n', &march);

				/* extra headers */
				extra_header_qty  = get_next_token_int (march, '\n', &march);
				for (j=0; j<extra_header_qty; ++j)
				{
					const glong key_idx = get_next_token_long (march, '\n', &march);
					const glong val_idx = get_next_token_long (march, '\n', &march);
					const gchar * key = 0<=key_idx && key_idx<dat->len ? dat->data+key_idx : NULL;
					const gchar * val = 0<=val_idx && val_idx<dat->len ? dat->data+val_idx : NULL;
					if (key!=NULL && val!=NULL)
						article_init_header (a, key, val, 0);
				}

				/* let the user know what we're doing */
				if (status != NULL) {
					status_item_emit_next_step (status);
					if (!(addme->len % 256))
						status_item_emit_status_va (status,
							_("Loaded %d of %d articles"), i, qty);
				}

				/* add the article to the group if it looks sane */
				if (article_is_valid (a))
					g_ptr_array_add (addme, a);
				else {
					++purged_article_count;
					pan_object_unref (PAN_OBJECT(a));
				}
			}

			if (purged_article_count != 0)
			{
				log_add_va (LOG_ERROR,
					_("Pan skipped %d corrupt headers from the local cache for group %s.\nYou may want to empty this group and download fresh headers."),
					purged_article_count,
					group_get_readable_name(group));
			}

			group_add_articles_remove_unused (group, addme, status);
			group_set_article_qty (group, addme->len);
			group->articles_dirty = purged_article_count!=0;
			success = TRUE;
			g_ptr_array_free (addme, TRUE);
		}
		else
		{
			log_add_va (LOG_ERROR|LOG_URGENT,
				_("Unsupported data version for %s headers : %d"), group->name, version);
		}
	}

	if (dat != NULL) g_array_free (dat, !success);
	if (idx != NULL) g_array_free (idx, TRUE);

	debug_exit ("file_headers_load_group");
	return success;
}

static gchar*
get_mbox_filename (const Group * folder)
{
	gchar * retval;
	debug_enter ("get_mbox_filename");

	g_return_val_if_fail (folder!=NULL, NULL);
	g_return_val_if_fail (group_is_folder(folder), NULL);

	retval = g_strdup_printf ("/%s/%s/%s.mbox",
		data_dir,
		folder->server->name,
		folder->name);

	debug_exit ("get_mbox_filename");
	return retval;
}

static gboolean
file_headers_load_folder (Group * folder, StatusItem * status)
{
	gint i;
	gchar * text;
	const gchar * march;
	GPtrArray * articles;
	debug_enter ("file_headers_load_folder");

	g_return_val_if_fail (folder!=NULL, FALSE);
	g_return_val_if_fail (group_is_folder(folder), FALSE);

	/* get the mbox file */
	{
		GArray * array;
		gchar * fname = get_mbox_filename (folder);
		array = read_file (fname);
		g_free (fname);
		if (array == NULL)
			return FALSE;
		text = array->data;
		g_array_free (array, FALSE);
	}

	/* allocate the articles array */
	pan_warn_if_fail (folder->_articles == NULL);
	group_get_articles (folder);

	/* walk through the mbox */
	i = 0;
	march = text;
	articles = g_ptr_array_new ();
	while (march != NULL)
	{
		gchar * from = NULL;
		gchar * msg = NULL;

		/* parse the next mbox message */
		march = mbox_get_next_message (march, &from, &msg);
		if (msg != NULL)
		{
			Article * a = article_new (folder);
			a->number = ++i;
			article_set_from_raw_message (a, msg);
			g_free (msg);

			/* keep the good ones */
			if (article_is_valid (a))
				g_ptr_array_add (articles, a);
			else
				pan_object_unref (PAN_OBJECT(a));
		}

		g_free (from);
	}

	/* if we've got articles then add them; otherwise, clean up */
	if (articles->len != 0)
		group_add_articles_remove_unused (folder, articles, status);
	else {
		g_hash_table_destroy (folder->_articles);
		folder->_articles = NULL;
	}
	folder->articles_dirty = FALSE;

	/* cleanup */
	g_free (text);

	debug_exit ("file_headers_load_folder");
	return folder->_articles!=NULL && g_hash_table_size(folder->_articles)!=0;
}

void
file_headers_load (Group * group, StatusItem * status)
{
	gint size;
	struct timeval start;
	struct timeval finish;
	double diff;
	gboolean success = FALSE;
	debug_enter ("file_headers_load");

	/* start the stopwatch */
	gettimeofday (&start, NULL);

	if (group_is_folder (group))
	{
		success = file_headers_load_folder (group, status);


	}
	if (!success)
		success = file_headers_load_group (group, status);

	/* expire the old articles, if any */
	if (success)
		group_expire_old_articles (group);

	/* timing stats */
	gettimeofday (&finish, NULL);
	diff = finish.tv_sec - start.tv_sec;
	diff += (finish.tv_usec - start.tv_usec)/1000000.0;
	size = g_hash_table_size(group_get_articles(group));
	if (size != 0)
		log_add_va (LOG_INFO, _("Loaded %d articles for group `%s' in %.1f seconds (%.0f art/sec)"),
			size,
			group_get_readable_name(group),
			diff,
			size/(fabs(diff)<0.001?0.001:diff));

	debug_exit ("file_headers_load");
}

/**
 * This is used to make sure we don't write the same string to the output
 * file more than once.  This way we can get some space savings from authors
 * and subjects that are repeated many times.
 */
long
get_string_offset (GHashTable   * hash,
                   GMemChunk    * long_chunk,
                   const gchar  * str,
                   FILE         * fp,
                   long         * pos)
{
	gpointer p;
	long retval;

	g_assert (fp != NULL);
	g_assert (pos != NULL);

	if (!is_nonempty_string(str)) /* nothing to write */
	{
		retval = -1;
	}
	else if (hash == NULL) /* don't bother weeding duplicates */
	{
		retval = *pos;
		*pos += fwrite (str, 1, strlen(str)+1, fp);
	}
	else if ((p = g_hash_table_lookup(hash, str)) != NULL) /* a match! */
	{
		retval = *(glong*)p;
	}
	else /* first time; remember where we kept it */
	{
		glong * plong = g_chunk_new (glong, long_chunk);
		retval = *plong = *pos;
		g_hash_table_insert (hash, (gpointer)str, plong);
		*pos += fwrite (str, 1, strlen(str)+1, fp);
	}

	return retval;
}


static void
file_headers_save_folder (Group * folder, StatusItem * status)
{
	FILE * fp;
	gchar * fname;
	gchar * fname_tmp;
	debug_enter ("file_headers_save_folder");

	g_return_if_fail (group_is_folder(folder));

	/* get the filenames & start writing to the outfile */
	fname = get_mbox_filename (folder);
	fname_tmp = g_strdup_printf ("%s.tmp", fname);
	fp = fopen (fname_tmp, "w+");

	if (fp != NULL)
	{
		guint i = 0;
		GPtrArray * array = group_get_article_array (folder);
		GString * s = g_string_new (NULL);

		for (i=0; i!=array->len; ++i)
		{
			Article * a = ARTICLE(g_ptr_array_index(array,i));
			gchar buf[32], *pch, *mbox;

			/* add headers */
			pch = article_get_headers (a);
			g_string_assign (s, pch);
			g_free (pch);

			/* add Status: header */
			*(pch=buf)='\0';
			if (article_is_read(a)) *pch++ = 'R';
			if (!article_is_new(a)) *pch++ = 'O';
			*pch = '\0';
			if (*buf != '\0')
				g_string_sprintfa (s, "Status: %s\n", buf);
  
			/* add header/body separator */ 
			g_string_append (s, "\n");
  
			/* add body */ 
			pch = article_get_body (a);
			g_string_append (s, pch);
			g_free (pch);

			/* write the message in mbox format */
			mbox = mbox_format_message (s->str, a->author_addr, a->date);
			if (mbox != NULL)
			{
				fputs (mbox, fp);
				g_free (mbox);
			}
		}
		fclose (fp);

		/* wrote to tmp file okay... now make it permanent */
		rename (fname_tmp, fname);

		/* cleanup */
		g_ptr_array_free (array, TRUE);
		g_string_free (s, TRUE);
	}

	g_free (fname);
	g_free (fname_tmp);
	debug_exit ("file_headers_save_folder");
}

static void
file_headers_save_group (Group * group, StatusItem * status)
{
	guint i;
	FILE * idx_fp;
	FILE * dat_fp;
	gchar * pch;
	gchar * idx_path;
	gchar * dat_path;
	long pos = 0;
	GHashTable * hash_name = NULL;
	GHashTable * hash_mail = NULL;
	GHashTable * hash_refs = NULL;
	GHashTable * hash_misc = NULL;
	GMemChunk * long_chunk = NULL;
	GPtrArray * articles;
	debug_enter ("file_headers_save_group");

	g_return_if_fail (group!=NULL);
	g_return_if_fail (group->server!=NULL);
	g_return_if_fail (group->_articles!=NULL);

	/* if nothing to save */
	articles = group_get_article_array (group);
	if (articles->len == 0) {
		g_ptr_array_free (articles, TRUE);
		file_headers_destroy (group);
		return;
	}

	/**
	***  Save the Headers
	**/

	/* open index file */
	idx_path = g_strdup_printf ("/%s/%s/%s.idx.tmp", data_dir, group->server->name, group->name);
	idx_fp = fopen (idx_path, "w+");
	if (idx_fp == NULL) {
		g_free (idx_path);
		return;
	}

	/* open data file */
	dat_path = g_strdup_printf ("/%s/%s/%s.dat.tmp", data_dir, group->server->name, group->name);
	dat_fp = fopen (dat_path, "w+");
	if (dat_fp == NULL) {
		fclose (idx_fp);
		remove (idx_path);
		g_free (idx_path);
		g_free (dat_path);
		return;
	}

	/* Write DATBASE_VERSION */
	fprintf (idx_fp, "4\n%ld\n", (long)articles->len);

	/* Write the article information... */
	pos = 0;
	hash_name = g_hash_table_new (g_str_hash, g_str_equal);
	hash_mail = g_hash_table_new (g_str_hash, g_str_equal);
	hash_refs = g_hash_table_new (g_str_hash, g_str_equal);
	hash_misc = g_hash_table_new (g_str_hash, g_str_equal);
	long_chunk = g_mem_chunk_create (glong, 8192, G_ALLOC_ONLY);
	for (i=0; i!=articles->len; ++i)
	{
		Article * a = ARTICLE(g_ptr_array_index (articles, i));
		GPtrArray * extra_headers = article_get_all_headers (a);
		const gchar * subject = article_get_subject (a);
		const gchar * message_id = article_get_message_id (a);
		guint j;
		const long id_idx          = get_string_offset (NULL,      NULL,       message_id,     dat_fp, &pos);
		const long author_addr_idx = get_string_offset (hash_mail, long_chunk, a->author_addr, dat_fp, &pos);
		const long author_real_idx = get_string_offset (hash_name, long_chunk, a->author_real, dat_fp, &pos);
		const long subj_idx        = get_string_offset (hash_misc, long_chunk, subject,        dat_fp, &pos);
		const long refs_idx        = get_string_offset (hash_refs, long_chunk, a->references,  dat_fp, &pos);
		const long xref_idx        = get_string_offset (hash_refs, long_chunk, a->xref      ,  dat_fp, &pos);

		pan_warn_if_fail (a->number != 0);

		/* write the non-string fields. */
		fprintf (idx_fp,
			"%ld\n" "%ld\n" "%ld\n" "%ld\n" "%ld\n" "%ld\n"
			"%d\n" "%d\n"
			"%u\n"
			"%d\n"
			"%d\n"
			"%lu\n"
			"%lu\n",
			id_idx, author_addr_idx, author_real_idx, subj_idx, refs_idx, xref_idx,
			(int)a->part,
			(int)a->parts,
			(unsigned int)a->linecount,
			(int)a->crosspost_qty,
			(int)a->state,
			(unsigned long)a->date,
			(unsigned long)a->number);

		/* make sure the extra headers are in pairs */
		if ((extra_headers->len % 2) != 0) {
			pan_warn_if_reached ();
			g_ptr_array_set_size (extra_headers, 0);
		}

		/* remove extra headers that we don't want to save */
		for (j=0; j<extra_headers->len; ) {
			const gchar * key = (const gchar*) g_ptr_array_index (extra_headers, j);
			const gchar * val = (const gchar*) g_ptr_array_index (extra_headers, j+1);
			if (is_nonempty_string(key) && is_nonempty_string(val))
				j += 2;
			else {
				g_ptr_array_remove_index (extra_headers, j);
				g_ptr_array_remove_index (extra_headers, j);
			}
		}

		/* write the extra headers */
		fprintf (idx_fp, "%d\n", (int)extra_headers->len/2); /* extra header qty */
		for (j=0; j<extra_headers->len; j+=2) {
			const gchar * key = (const gchar*) g_ptr_array_index (extra_headers, j);
			const gchar * val = (const gchar*) g_ptr_array_index (extra_headers, j+1);
			const long key_index = get_string_offset (hash_misc, long_chunk, key, dat_fp, &pos);
			const long val_index = get_string_offset (hash_misc, long_chunk, val, dat_fp, &pos);
			fprintf (idx_fp, "%ld\n%ld\n", key_index, val_index);
		}

		g_ptr_array_free (extra_headers, TRUE);
	}
	g_hash_table_destroy (hash_name); hash_name = NULL;
	g_hash_table_destroy (hash_mail); hash_mail = NULL;
	g_hash_table_destroy (hash_refs); hash_refs = NULL;
	g_hash_table_destroy (hash_misc); hash_misc = NULL;
	g_mem_chunk_destroy (long_chunk);

	/* the write went okay; move the idx file over */
	fclose (idx_fp);
	pch = g_strdup_printf ("/%s/%s/%s.idx", data_dir, group->server->name, group->name);
	rename (idx_path, pch);
	g_free (idx_path);
	g_free (pch);

	/* the write went okay; move the dat file over */
	fclose (dat_fp);
	pch = g_strdup_printf ("/%s/%s/%s.dat", data_dir, group->server->name, group->name);
	rename (dat_path, pch);
	g_free (dat_path);
	g_free (pch);

	/* cleanup */
	g_ptr_array_free (articles, TRUE);
	debug_exit ("file_headers_save_group");
}
void
file_headers_save (Group * group, StatusItem * status)
{
	GPtrArray * articles;
	struct timeval start;
	struct timeval finish;
	double diff;
	debug_enter ("file_headers_save");

	gettimeofday (&start, NULL);

	/* trivial case #1: no change, so no need to save */
	if (!group->articles_dirty)
		return;

	/* since we're saving, mark dirty to FALSE */
	group->articles_dirty = FALSE;

	articles = group_get_article_array (group);
	if (articles->len == 0) /* destroy the group */
	{
		file_headers_destroy (group);
		return;
	}
	else /* save the group */
	{
		if (group_is_folder (group))
			file_headers_save_folder (group, status);
		else
			file_headers_save_group (group, status);
		/* timing stats */
		gettimeofday (&finish, NULL);
		diff = finish.tv_sec - start.tv_sec;
		diff += (finish.tv_usec - start.tv_usec)/1000000.0;
		log_add_va (LOG_INFO, _("Saved %d articles in \"%s\" in %.1f seconds (%.0f art/sec)"),
			articles->len,
			group_get_readable_name (group),
			diff,
			articles->len/(fabs(diff)<0.001?0.001:diff));
	}

	g_ptr_array_free (articles, TRUE);
	debug_exit ("file_headers_save");
}

void
file_headers_destroy (const Group * group)
{
	debug_enter ("file_headers_destroy");

	g_return_if_fail (group!=NULL);
	g_return_if_fail (group->server!=NULL);
	g_return_if_fail (is_nonempty_string(data_dir));

	if (group_is_folder(group))
	{
		gchar * mbox = get_mbox_filename (group);
		remove (mbox);
		g_free (mbox);
	}
	else
	{
		gchar * path;

		path = g_strdup_printf ("/%s/%s/%s.idx", data_dir, group->server->name, group->name);
		remove (path);
		g_free (path);

		path = g_strdup_printf ("/%s/%s/%s.dat", data_dir, group->server->name, group->name);
		remove (path);
		g_free (path);
	}

	debug_exit ("file_headers_destroy");
}
