/*
** Modular Logfile Analyzer
** Copyright 2000 Jan Kneschke <jan@kneschke.de>
**
** Homepage: http://www.kneschke.de/projekte/modlogan
**

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA

**
** $Id: mdatatypes.c,v 1.50 2001/12/31 14:16:39 ostborn Exp $
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include <libxml/tree.h>

#include "config.h"
#include "mdatatypes.h"
#include "misc.h"

/* the datatypes */
#include "datatypes/count/datatype.h"
#include "datatypes/visited/datatype.h"
#include "datatypes/webhist/datatype.h"
#include "datatypes/state/datatype.h"
#include "datatypes/match/datatype.h"
#include "datatypes/record/datatype.h"
#include "datatypes/sublist/datatype.h"
#include "datatypes/split/datatype.h"
#include "datatypes/brokenlink/datatype.h"
#include "datatypes/traffic/datatype.h"
#include "datatypes/netmask/datatype.h"
#include "datatypes/visit/datatype.h"
#include "datatypes/query/datatype.h"

#define M_DEBUG_SAX_ENTRY 0
#define M_DEBUG_SAX_DISPATCH 0

typedef struct {
	int type;
	const char *string;
} key_map;

const key_map key_mapping[] = {
	{ M_DATA_TYPE_COUNT, "count" },
	{ M_DATA_TYPE_VISITED, "visited" },
	{ M_DATA_TYPE_WEBHIST, "webhist" },
	{ M_DATA_TYPE_BROKENLINK, "brokenlink" },
	{ M_DATA_TYPE_VISIT, "visit" },
	{ M_DATA_TYPE_SUBLIST, "sublist" },
	
	{ -1, NULL }
};

/**
 * get a string represenation for the current datatype
 * 
 * mainly used by the xml-write code (key='...')
 * 
 * creates warnings if the datatype is unknown, not handled or unset
 * 
 * @param data the datatype 
 * @return the corresponding string or 'unknown' if the datatype is currently not handled
 */
const char *mdata_get_key_from_datatype(mdata *data) {
	int i;
	if (!data) return NULL;
	
	i = 0;
	while (key_mapping[i].string != NULL) {
		if (key_mapping[i].type == data->type) {
			return key_mapping[i].string;
		}
		i++;
	}
	
	/* if everything went well we shouldn't reach this point */
	fprintf(stderr, "%s.%d: can't %s datatype '%d' (%s)\n", __FILE__, __LINE__, "get key for", data->type, data->key);

	return "unknown";
}

/**
 * get the data for a given string representation
 * 
 * mainly used by the xml-read code (key='...')
 * 
 * creates warnings if the datatype is unknown, not handled or unset
 * 
 * @param data the datatype 
 * @return the corresponding string or 'unknown' if the datatype is currently not handled
 */
int mdata_get_datatype_from_key(const char *key) {
	int i;
	if (!key) return M_DATA_TYPE_UNSET;
	
	i = 0;
	while (key_mapping[i].string != NULL) {
		if (strcmp(key_mapping[i].string, key) == 0) {
			return key_mapping[i].type;
		}
		i++;
	}
	
	/* if everything went well we shouldn't reach this point */
	fprintf(stderr, "%s.%d: can't %s datatype '%s'\n", __FILE__, __LINE__, "get datatype for", key);

	return M_DATA_TYPE_UNSET;
}

/**
 * free all structures of the datatype
 * 
 * creates warnings if the datatype is unknown, not handled or unset
 * 
 * @param data the to be freed datatype
 */
void mdata_free(mdata *data) {
	if (!data) return;
	
	switch(data->type) {
		case M_DATA_TYPE_COUNT: 
			mdata_Count_free(data);
			break;
		case M_DATA_TYPE_VISITED:
			mdata_Visited_free(data);
			break;
		case M_DATA_TYPE_WEBHIST:
			mdata_WebHist_free(data);
			break;
		case M_DATA_TYPE_RECORD:
			mdata_Record_free(data);
			break;
		case M_DATA_TYPE_STATE:
			mdata_State_free(data);
			break;
		case M_DATA_TYPE_MATCH:
			mdata_Match_free(data);
			break;
		case M_DATA_TYPE_SUBLIST:
			mdata_SubList_free(data);
			break;
		case M_DATA_TYPE_SPLIT:
			mdata_Split_free(data);
			break;
		case M_DATA_TYPE_BROKENLINK:
			mdata_BrokenLink_free(data);
			break;
		case M_DATA_TYPE_TRAFFIC:
			mdata_Traffic_free(data);
			break;
		case M_DATA_TYPE_NETMASK:
			mdata_Netmask_free(data);
			break;
		case M_DATA_TYPE_VISIT:
			mdata_Visit_free(data);
			break;
		case M_DATA_TYPE_QUERY:
			mdata_Query_free(data);
			break;
		case M_DATA_TYPE_UNSET:
			fprintf(stderr, "%s.%d: can't %s unset datatype\n", __FILE__, __LINE__, "free");
			break;
		default:
			fprintf(stderr, "%s.%d: can't %s datatype '%d'\n", __FILE__, __LINE__, "free", data->type);
			break;
	}
	
	if (data->key)	free(data->key);
#ifdef DEBUG_DATATYPES
	fprintf(stderr, "%s.%d: freeing ID '%d'\n", __FILE__, __LINE__, data->id);
#endif
	free(data);
}

/**
 * create a new datatype
 * 
 * allocates the memory for a new datatype and sets the internal 
 * type specifier to UNSET
 * 
 * @return a new 'undefined' datatype
 */

mdata *mdata_init () {
	mdata *data;
	static int id = 0;
	data = malloc(sizeof(mdata));
	assert(data);
	
	data->key	= NULL;
	data->type	= M_DATA_TYPE_UNSET;
	data->id        = id++;
	
	return data;
}

/**
 * convertes the datatype to XML
 * 
 * this function is mainly a dispatcher between the different datatypes
 * and just calles their own ..._to_xml functions.
 * 
 * creates warnings if the datatype is unknown, not handled or unset
 * 
 * @param node a XML node
 * @param data the datatype which 
 * @return -1 on error or the return value of the specific _to_xml function
 */

int mdata_datatype_to_xml(gzFile *fd, mdata *data) {
	if (!data) return -1;
	
	switch (data->type) {
		case M_DATA_TYPE_COUNT: 
			return mdata_Count_to_xml(fd, data);
		case M_DATA_TYPE_VISITED: 
			return mdata_Visited_to_xml(fd, data);
		case M_DATA_TYPE_BROKENLINK: 
			return mdata_BrokenLink_to_xml(fd, data);
		case M_DATA_TYPE_WEBHIST: 
			return mdata_WebHist_to_xml(fd, data);
		case M_DATA_TYPE_VISIT: 
			return mdata_Visit_to_xml(fd, data);
		case M_DATA_TYPE_SUBLIST: 
			return mdata_SubList_to_xml(fd, data);
		case M_DATA_TYPE_UNSET:
			fprintf(stderr, "%s.%d: can't %s unset datatype\n", __FILE__, __LINE__, "write");
			break;
		default:
			fprintf(stderr, "%s.%d: can't %s datatype '%d'\n", __FILE__, __LINE__, "write", data->type);
			break;
	}
	return -1;
}

/**
 * creates a new XML-node in the XML-tree and inserts the XML from the datatypes
 * 
 * @param node a XML node
 * @param data the datatype which
 */

int mdata_write(gzFile *fd, mdata *data) {
	/* encode key with url-encoding */
	char *encoded_key = url_encode(data->key);
	
	gzprintf(fd, "<%s key=\"%s\">\n", mdata_get_key_from_datatype(data), encoded_key);
	/* free memory */
	free(encoded_key);
	
	mdata_datatype_to_xml(fd, data);
	
	gzprintf(fd, "</%s>\n", mdata_get_key_from_datatype(data));
	
	return 0;
}

/**
 * combine two elements 
 * 
 * used if the keys of both elements are equal while a *_insert 
 * the actual append function can reject the combine if the internal contrains
 * aren't matched
 * 
 * @param dst elements 
 * @param src element which 
 * @return M_DATA_APPEND - append successfull
 */

mdata_append_return mdata_append(mdata *dst, mdata *src) {
	int ret = 0;
	if (dst->type != src->type) return -1;
	
	switch(src->type) {
		case M_DATA_TYPE_COUNT: 
			ret = mdata_Count_append(dst, src);
			break;
		case M_DATA_TYPE_VISITED:
			ret = mdata_Visited_append(dst, src);
			break;
		case M_DATA_TYPE_VISIT:
			ret = mdata_Visit_append(dst, src);
			break;
		case M_DATA_TYPE_WEBHIST:
			ret = mdata_WebHist_append(dst, src);
			break;
		case M_DATA_TYPE_RECORD:
			ret = mdata_Record_append(dst, src);
			break;
		case M_DATA_TYPE_STATE:
			ret = mdata_State_append(dst, src);
			break;
		case M_DATA_TYPE_MATCH:
			ret = mdata_Match_append(dst, src);
			break;
		case M_DATA_TYPE_SUBLIST:
			ret = mdata_SubList_append(dst, src);
			break;
		case M_DATA_TYPE_SPLIT:
			ret = mdata_Split_append(dst, src);
			break;
		case M_DATA_TYPE_BROKENLINK:
			ret = mdata_BrokenLink_append(dst, src);
			break;
		case M_DATA_TYPE_TRAFFIC:
			ret = mdata_Traffic_append(dst, src);
			break;
		case M_DATA_TYPE_NETMASK:
			ret = mdata_Netmask_append(dst, src);
			break;
		case M_DATA_TYPE_UNSET:
			fprintf(stderr, "%s.%d: can't %s unset datatype\n", __FILE__, __LINE__, "append");
			break;
		default:
			fprintf(stderr, "%s.%d: can't %s datatype '%d'\n", __FILE__, __LINE__, "append", src->type);
			break;
	}
	
	return ret;
}

/**
 * copy the element
 * 
 * @param src to be copied element
 * @return a copy of the provided element
 */

mdata *mdata_copy(mdata *src) {
	mdata *dst = NULL;
	
	switch(src->type) {
		case M_DATA_TYPE_COUNT: 
			dst = mdata_Count_copy(src);
			break;
		case M_DATA_TYPE_VISITED:
			dst = mdata_Visited_copy(src);
			break;
		case M_DATA_TYPE_WEBHIST:
			dst = mdata_WebHist_copy(src);
			break;
		case M_DATA_TYPE_RECORD:
			dst = mdata_Record_copy(src);
			break;
		case M_DATA_TYPE_STATE:
			dst = mdata_State_copy(src);
			break;
		case M_DATA_TYPE_MATCH:
			dst = mdata_Match_copy(src);
			break;
		case M_DATA_TYPE_SUBLIST:
			dst = mdata_SubList_copy(src);
			break;
		case M_DATA_TYPE_SPLIT:
			dst = mdata_Split_copy(src);
			break;
		case M_DATA_TYPE_BROKENLINK:
			dst = mdata_BrokenLink_copy(src);
			break;
		case M_DATA_TYPE_TRAFFIC:
			dst = mdata_Traffic_copy(src);
			break;
		case M_DATA_TYPE_NETMASK:
			dst = mdata_Netmask_copy(src);
			break;
		case M_DATA_TYPE_UNSET:
			fprintf(stderr, "%s.%d: can't %s unset datatype\n", __FILE__, __LINE__, "copy");
			break;
		default:
			fprintf(stderr, "%s.%d: can't %s datatype '%d'\n", __FILE__, __LINE__, "copy", src->type);
			break;
	} 
	
	return dst;
}

/**
 * convert a xml-node to a 'mdata'
 * 
 * spites error messages if a unhandled datatype should be read
 * 
 * return 0 - success, -1 error
 * 
 */
int mdata_read(mstate_stack *m, int tagtype, mdata *data, int type, const xmlChar *value, const xmlChar **attrs) {
	int handled = 0, datatype = 0;
	int i;
#if M_DEBUG_SAX_ENTRY
	M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "|--> '%s' - %s - %d\n", 
		 value, tagtype == 1 ? "open" : (tagtype == 2 ? "close" : (tagtype == 3 ? "text" : "unknown")), type);
	M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "o->");
	for (i = 0; i < m->st_depth_max + 2; i++) {
		fprintf(stderr, " %d (%p)", m->st[i].id, m->st[i].data);
	}
	fprintf(stderr, "\n");
#endif
	switch(tagtype) {
	case M_TAG_BEGIN:
		if (m->st_depth != m->st_depth_max) {
#if M_DEBUG_SAX_DISPATCH
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
				 m->st_depth, 
				 m->st_depth_max,
				 value);
#endif
			m->st_depth++;
			datatype = m->st[m->st_depth].id;
		} else {
			int dt;
			
			if ((dt = mdata_get_datatype_from_key(value)) == M_DATA_TYPE_UNSET) {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "no datatype found for '%s'\n",
						 value);
				return -1;
			} else {
				m->st_depth_max++;
				m->st_depth++;
				m->st[m->st_depth].id = dt;
				
				/* check if we have a key */
				if (attrs && attrs[0] && 0 == strcmp("key", attrs[0])
				    && attrs[1]) {
					/* uri-decode the key */
					char *decoded_key = url_decode(attrs[1]);
					/* we don't need the encoded anymore */
					
					if (data->key) free(data->key);
					
					data->key = decoded_key;
					
				} else {
					M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "no key set for '%s'\n",
						 value);
					return -1;
				}
#if 0
				M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "setting datatype for '%s' to %d\n", 
					 value, dt);
#endif
				handled = 1;
			}
		}
		break;
	case M_TAG_END:
		if (m->st_depth != m->st_depth_max) {
#if M_DEBUG_SAX_DISPATCH
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
				 m->st_depth, 
				 m->st_depth_max,
				 value);
#endif
			datatype = m->st[m->st_depth].id;
			m->st_depth++;
		} else {
			m->st[m->st_depth].id = 0;
			m->st_depth_max--;
			
			handled = 1;
		}
		break;
	case M_TAG_TEXT:
		if (m->st_depth != m->st_depth_max) {
#if M_DEBUG_SAX_DISPATCH
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
				 m->st_depth, 
				 m->st_depth_max,
				 value);
#endif
			datatype = m->st[m->st_depth].id;
			m->st_depth++;
		} else {
			handled = 1;
		}
		break;
	default:
		M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "can't handle tagtype '%d'\n", 
			 tagtype);
		return -1;
	}
#if 0	
	fprintf(stderr, "o-<");
	for (i = 0; i < m->st_depth_max + 2; i++) {
		fprintf(stderr, " %d", m->st[i].id);
	}
	fprintf(stderr, "\n");
#endif
	if (!handled) {
#if 0
		M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "datatype '%d' - level %d - %d\n", 
			 datatype, m->st_depth, data->type);
#endif
		switch(datatype) {
		case M_DATA_TYPE_COUNT:
			if (mdata_Count_from_xml(m, tagtype, data, type, value, attrs))
				return -1;
			break;
		case M_DATA_TYPE_VISITED:
			if (mdata_Visited_from_xml(m, tagtype, data, type, value, attrs))
				return -1;
			break;
		case M_DATA_TYPE_BROKENLINK:
			if (mdata_BrokenLink_from_xml(m, tagtype, data, type, value, attrs)) 
				return -1;
			break;
		case M_DATA_TYPE_VISIT:
			if (mdata_Visit_from_xml(m, tagtype, data, type, value, attrs)) 
				return -1;
			break;
		case M_DATA_TYPE_WEBHIST:
			if (mdata_WebHist_from_xml(m, tagtype, data, type, value, attrs)) 
				return -1;
			break;
		case M_DATA_TYPE_SUBLIST:
			if (mdata_SubList_from_xml(m, tagtype, data, type, value, attrs)) 
				return -1;
			break;
		case M_DATA_TYPE_UNSET:
			M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "can't %s unset datatype\n", 
				 "read");
			return -1;
			break;
		default:
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "can't %s xml for field '%s' - datatype: %d\n", 
				 "read", value,
				 datatype);
			return -1;
		}
	}
	
	return 0;
}

/**
 * get the count of the countable elements
 * 
 * @param data the element which is queried
 * @return the count value or 0 if the uncountable value is queried
 */

int mdata_get_count(mdata *data) {
	if (data == NULL) return 0;

	switch(data->type) {
		case M_DATA_TYPE_COUNT:
			return data->data.count.count;
		case M_DATA_TYPE_VISITED:
			return data->data.visited.count;
		case M_DATA_TYPE_BROKENLINK:
			return data->data.brokenlink.count;
		case M_DATA_TYPE_SUBLIST:
			return data->data.sublist.count;
		case M_DATA_TYPE_UNSET:
			fprintf(stderr, "%s.%d: can't %s from unset datatype (%s)\n", __FILE__, __LINE__, "get count", data->key);
			break;
		default:
			fprintf(stderr, "%s.%d: can't %s from datatype '%d'\n", __FILE__, __LINE__, "get count", data->type);
	}
	return 0;
}

/**
 * get the vcount of the elements with vcount (currently visits only)
 * 
 * @param data the element which is queried
 * @return the vcount value or 0 if the uncountable value is queried
 */

double mdata_get_vcount(const mdata *data) {
	if (data == NULL) return 0;

	switch(data->type) {
		case M_DATA_TYPE_VISITED:
			return data->data.visited.vcount;
		case M_DATA_TYPE_UNSET:
			fprintf(stderr, "%s.%d: can't %s from unset datatype (%s)\n", __FILE__, __LINE__, "get vcount", data->key);
			break;
		default:
			fprintf(stderr, "%s.%d: can't %s from datatype '%d'\n", __FILE__, __LINE__, "get vcount", data->type);
	}
	return 0;
}

/**
 * set the count of countable elements
 * 
 * @param data
 * @param count
 * @return -1 error, 0 ok
 */

int mdata_set_count(mdata *data, int count) {
	if (data == NULL) return -1;

	switch(data->type) {
		case M_DATA_TYPE_COUNT:
			data->data.count.count = count;
			return 0;
		case M_DATA_TYPE_VISITED:
			data->data.visited.count = count;
			return 0;
		case M_DATA_TYPE_BROKENLINK:
			data->data.brokenlink.count = count;
			return 0;
		case M_DATA_TYPE_SUBLIST:
			data->data.sublist.count = count;
			return 0;
		case M_DATA_TYPE_UNSET:
			fprintf(stderr, "%s.%d: can't %s for unset datatype\n", __FILE__, __LINE__, "set count");
			break;
		default:
			fprintf(stderr, "%s.%d: can't %s for datatype '%d'\n", __FILE__, __LINE__, "set count", data->type);
	}
	return -1;
}

/**
 * transform a xmlleaf to the internals strutures
 * 
 * @param dest destination
 * @param type type of the destination
 * @param node the xml leaf
 * @return
 */

int mdata_insert_value(mstate_stack *m, int tagtype, void *dest, int type, const xmlChar *value, const xmlChar **attrs) {
	int i;
#if M_DEBUG_SAX_ENTRY
	M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "|--> '%s' - %s - %d\n", 
		 value, tagtype == 1 ? "open" : (tagtype == 2 ? "close" : (tagtype == 3 ? "text" : "unknown")), type);
	M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "o->");
	for (i = 0; i < m->st_depth_max + 2; i++) {
		fprintf(stderr, " %d (%p)", m->st[i].id, m->st[i].data);
	}
	fprintf(stderr, "\n");
#endif	
	switch (type) {
	case M_DATA_FIELDTYPE_LONG: {
		switch (tagtype) {
		case M_TAG_TEXT: {
			int str = *(int *)(dest);
			
			str = strtol(value, NULL, 10);
#if 0
			M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "set long to: %d\n",
				 str);
#endif
			*(int *)(dest) = str;
			break;
		}
		default:
			M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d\n",
				 tagtype);
			return -1;
		}
		break;	
	}
			
	case M_DATA_FIELDTYPE_DOUBLE: {
		switch (tagtype) {
		case M_TAG_TEXT: {
			double str = *(double *)(dest);
			
			str = strtod(value, NULL);
			
#if 0
			M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "set double '%s' to: %f\n",
				 value, str);
#endif
			
			*(double *)(dest) = str;
			break;
		}
		default:
			M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d\n",
				 tagtype);
			return -1;
		}
		break;	
	}
	case M_DATA_FIELDTYPE_STRING: {
		switch (tagtype) {
		case M_TAG_TEXT: {
			char *str;
			
			if (*(char **)(dest) != NULL) {
				free(*(char **)(dest));
				*(char **)(dest) = NULL;
			}
			
			str = malloc(strlen(value) + 1);
			strcpy(str, value);
			
			*(char **)(dest) = str;
			break;
		}
		default:
			M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d\n",
				 tagtype);
			return -1;
		}
		break;	
	}
	
	case M_DATA_FIELDTYPE_HASH: {
		mdata *data;
		mhash *hash = *(mhash **) dest;
		
		M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled datatype: %d\n",
				 type);
		return -1;
		/*
		while (node) {
			if (node->type != XML_ELEMENT_NODE) {
				node = node->next;
				continue;
			}
			
			data = mdata_init();
#ifdef DEBUG_DATATYPES
			fprintf(stderr, "%s.%d: got %d\n", __FILE__, __LINE__, data->id);
#endif
			mdata_read(data, node);
		
			if (data->key) {
				mhash_insert_sorted(hash, data);
			} else {
				fprintf(stderr, "%s.%d: *args* no key - going down hard\n", __FILE__, __LINE__);
				return -1;
			}
			
			node = node->next;
		}
		*/
		break;
	}
		
	case M_DATA_FIELDTYPE_LIST: {
		mlist *list = *(mlist **) dest;
		
#if 0		
		M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "datatype list\n");
#endif
		switch (tagtype) {
		case M_TAG_BEGIN: {
			if (m->st_depth != m->st_depth_max) {
				mdata *data;
#if M_DEBUG_SAX_DISPATCH
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
					 m->st_depth, 
					 m->st_depth_max,
					 value);
#endif				
				m->st_depth++;
				data = m->st[m->st_depth].data;
				if (data == NULL) {
					m->st[m->st_depth].data = data = mdata_init();
#if 0
					M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "setting data = %p, %s, %d\n",
						 data, value, data->type);
#endif
				}
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
				
			} else {
				mdata *data;
				m->st_depth_max++;
				m->st_depth++;
				m->st[m->st_depth].id = type;
				m->st[m->st_depth].data = data = mdata_init();
				assert(data);
				
#if 0
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "setting data = %p\n",
					 data);
#endif
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
			}
			break;
		}
		case M_TAG_END:	{
			mdata *data = m->st[m->st_depth].data;
			int std = m->st_depth;
			if (m->st_depth != m->st_depth_max) {
#if M_DEBUG_SAX_DISPATCH
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
					 m->st_depth, 
					 m->st_depth_max,
					 value);
#endif
				m->st_depth++;
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
				if (m->st_depth_max == std) {
					if (data && data->key) {
#if 0
						M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
							 "inserting data = %p, %s, %d\n",
							 data, data->key, data->type);
#endif
						mlist_insert(list, data);
					} else if (data) {
						M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
							 "have to call mdata_free\n");
						mdata_free(data);
					}
					m->st[std].data = data = NULL;
#if 0
					M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "unsetting data = %p, %s\n",
						 data, value);
#endif
				}
			} else {
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
				
				m->st[m->st_depth].id = 0;
				m->st_depth_max--;
				
				if (data && data->key) {
#if 1
					M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "inserting data = %p, %s, %s\n",
						 data, value, data->key);
#endif
					mlist_insert(list, data);
				} else if (data) {
					mdata_free(data);
				}
				m->st[m->st_depth].data = NULL;
#if 0
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "unsetting data = %p\n",
					 data);
#endif 
			}
			break;
		}
		case M_TAG_TEXT: {
			if (m->st_depth != m->st_depth_max) {
				mdata *data = m->st[m->st_depth].data;
				m->st_depth++;
				
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
			} else {
				/* ignore */
			}
			break;
		}
		default:
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d - %s - %d\n",
				 tagtype,
				 value,
				 type);
		}
		break;
	}

	default: 
		fprintf(stderr, "%s.%d: insert failed: unknown type %d -> %s\n", 
			__FILE__, __LINE__, 
			type,
			value ? (char *)value : "(NULL)");
		return -1;
	}
	return 0;
}

void mdata_array_msort(mdata **md, int *a, int *b, int l, int r, int sortby, int sortdir) {
	int i, j, k, m;
	
	if (r > l) {
		m = (r + l) / 2;
		mdata_array_msort(md, a, b, l, m, sortby, sortdir);
		mdata_array_msort(md, a, b, m+1, r, sortby, sortdir);
		for (i = m + 1; i > l; i--) b[i-1] = a[i-1];
		for (j = m; j < r; j++) b[r+m-j] = a[j+1];
		for (k = l; k <= r; k++) {
			switch (sortby) {
			case M_SORTBY_KEY:
				if (sortdir == M_SORTDIR_ASC) {
					a[k] = (strcmp(md[b[i]]->key, md[b[j]]->key) < 0) ? b[i++] : b[j--];
				} else {
					a[k] = (strcmp(md[b[i]]->key, md[b[j]]->key) > 0) ? b[i++] : b[j--];
				}
				break;
			case M_SORTBY_COUNT:
				if (sortdir == M_SORTDIR_ASC) {
					a[k] = (mdata_get_count(md[b[i]]) < mdata_get_count(md[b[j]])) ? b[i++] : b[j--];
				} else {
					a[k] = (mdata_get_count(md[b[i]]) > mdata_get_count(md[b[j]])) ? b[i++] : b[j--];
				}
				break;
			case M_SORTBY_VCOUNT:
				if (sortdir == M_SORTDIR_ASC) {
					a[k] = (mdata_get_vcount(md[b[i]]) < mdata_get_vcount(md[b[j]])) ? b[i++] : b[j--];
				} else {
					a[k] = (mdata_get_vcount(md[b[i]]) > mdata_get_vcount(md[b[j]])) ? b[i++] : b[j--];
				}
				break;
			default:
			}
		}
	}
}

int mdata_is_grouped(mdata *data) {
	switch (data->type) {
	case M_DATA_TYPE_COUNT:
		return (data->data.count.grouped == M_DATA_STATE_GROUPED);
	case M_DATA_TYPE_VISITED:
		return (data->data.visited.grouped == M_DATA_STATE_GROUPED);
	case M_DATA_TYPE_BROKENLINK:
		return (data->data.brokenlink.grouped == M_DATA_STATE_GROUPED);
	default:
		fprintf(stderr, "%s.%d: unhandled datatype for grouped: %d\n", __FILE__, __LINE__, data->type);
		return 0;
	}
}

int mdata_show(const mdata *data) {
	switch (data->type) {
	case M_DATA_TYPE_BROKENLINK:
		mdata_BrokenLink_show(data);
		return 0;
	default:
		fprintf(stderr, "%s.%d: unhandled datatype for show: %d\n", __FILE__, __LINE__, data->type);
		return -1;
	}
}
