/*--------------------------------------------------------------------
 *    The GMT-system:	@(#)blockmode.c	1.14  09/21/99
 *
 *	Copyright (c) 1991-1999 by P. Wessel and W. H. F. Smith
 *	See COPYING file for copying and redistribution conditions.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; version 2 of the License.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	Contact info: www.soest.hawaii.edu/gmt
 *--------------------------------------------------------------------*/

/*
   blockmode.c
   Takes lon, lat, data, [weight] on GMT_stdin or file and writes out one value
   per cell, where cellular region is bounded by West East South North and
   cell dimensions are delta_x, delta_y.
      
   Author: 	Walter H. F. Smith
   Date:	28 October, 1998
   Version:	1st version based on GMT3.1b blockmedian.c, but with changes
   		to struct DATA and to sort comparison routines to facilitate
   		mode coding.
*/

#include "gmt.h"

struct DATA {
	double	a[4];	/* a[0] = x, a[1] = y, a[2] = z, a[3] = w  */
        int     i;
}       *data;


int	compare_x(const void *point_1, const void *point_2);
int	compare_y(const void *point_1, const void *point_2);
int	compare_index_z(const void *point_1, const void *point_2);
double	weighted_mode (struct DATA *d, double wsum, int n, int k);

main (int argc, char **argv)
{	
	
	BOOLEAN	error, weighted, offset, report, nofile = TRUE, done = FALSE, first = TRUE, go_quickly = FALSE;
	/* Default value for go_quickly = FALSE for backward compatibility with 3.0  */
	BOOLEAN skip;
	
	FILE *fp = NULL;
	
	double	west, east, south, north, delta_x, delta_y, del_off;
	double	*in, out[4], idx, idy, i_n_in_cell;
	
	int	i, n_x, n_y, ix, iy, index, first_in_cell, first_in_new_cell, fno, n_files = 0, n_args, n_req;
	int	n_lost, n_read, n_pitched, n_cells_filled, n_alloc, x, y, n_expected_fields, n_fields, n_out;
	int	n_in_cell;
	
	char	modifier, buffer[BUFSIZ], format[BUFSIZ];

	argc = GMT_begin (argc, argv);
	
	west = east = south = north = delta_x = delta_y = 0.0;
	del_off = 0.5;
	error = weighted = offset = report = FALSE;

	for (i = 1; i < argc; i++) {
		if (argv[i][0] == '-') {
			switch (argv[i][1]) {
              
				/* Common parameters */
                      
				case 'H':
				case 'R':
				case 'V':
				case ':':
				case '\0':
                                      error += GMT_get_common_args (argv[i], &west, &east, &south, &north);
                                      break;
                              
				/* Supplemental parameters */
                              
				case 'b':
					error += GMT_io_selection (&argv[i][2]);
					break;
					
				case 'I':
					GMT_getinc (&argv[i][2], &delta_x, &delta_y);
					break;
				case 'N':
					offset = TRUE;
					break;
				case 'Q':
					go_quickly = TRUE;
					break;
				case 'W':
					if ( (modifier = argv[i][2]) == 'i' || modifier == 'I') {
						weighted = TRUE;
						report = FALSE;
					}
					else if (modifier == 'O' || modifier == 'o') {
						report = TRUE;
						weighted = FALSE;
					}
					else
						weighted = report = TRUE;
					break;
					
				default:
					error = TRUE;
                                        GMT_default_error (argv[i][1]);
					break;
			}
		}
		else
			n_files++;
	}

	if (argc == 1 || GMT_quick) {
		fprintf (stderr, "blockmode %s - Block averaging by mode estimates\n\n", GMT_VERSION);
		fprintf (stderr, "usage: blockmode [infile(s)] -I<xinc[m]>[/<yinc>[m]] -R<west/east/south/north>\n");
		fprintf (stderr, "\t[-H[<nrec>]] [-N] [-Q] [-V] [-W[i][o] ] [-:] [-bi[s][<n>]] [-bo[s]]\n\n");
		
		if (GMT_quick) exit (EXIT_FAILURE);
		
		fprintf (stderr, "\t-I sets Increment of the grid; enter xinc, optionally xinc/yinc.\n");
		fprintf (stderr, "\t   Default is yinc = xinc.  Append an m [or s] to xinc or yinc to indicate minutes [or seconds],\n");
		fprintf (stderr, "\t   e.g., -I10m/5m grids longitude every 10 minutes, latitude every 5 minutes.\n");
		GMT_explain_option ('R');
		fprintf (stderr, "\n\tOPTIONS:\n");
		GMT_explain_option ('H');
		fprintf (stderr, "\t-N Offsets registration so block edges are on gridlines (pixel reg.).  [Default: grid reg.]\n");
		fprintf (stderr, "\t-Q Quicker; get mode z and mean x,y.  [Default gets mode x, mode y, mode z.]\n");
		GMT_explain_option ('V');
		fprintf (stderr, "\t-W sets Weight options.  -WI reads Weighted Input (4 cols: x,y,z,w) but writes only (x,y,z) Output.\n");
		fprintf (stderr, "\t   -WO reads unWeighted Input (3 cols: x,y,z) but reports sum (x,y,z,w) Output.\n");
		fprintf (stderr, "\t   -W with no modifier has both weighted Input and Output; Default is no weights used.\n");
		GMT_explain_option (':');
		GMT_explain_option ('i');
		GMT_explain_option ('n');
		fprintf (stderr, "\t   Default is 3 (or 4 if -W is set).\n");
		GMT_explain_option ('o');
		GMT_explain_option ('.');
		exit (EXIT_FAILURE);
	}
	
	if (!project_info.region_supplied) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR:  Must specify -R option\n", GMT_program);
		error++;
	}
	if (delta_x <= 0.0 || delta_y <= 0.0) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR -I option.  Must specify positive increment(s)\n", GMT_program);
		error = TRUE;
	}
	if (GMT_io.binary[0] && gmtdefs.io_header) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR.  Binary input data cannot have header -H\n", GMT_program);
		error++;
	}
	n_req = (weighted) ? 4 : 3;
	if (GMT_io.binary[0] && GMT_io.ncol[0] == 0) GMT_io.ncol[0] = n_req;
	if (GMT_io.binary[0] && n_req > GMT_io.ncol[0]) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR.  binary input data must have at least %d columns\n", GMT_program, n_req);
		error++;
	}
	
	if (error) exit (EXIT_FAILURE);

	GMT_put_history (argc, argv);	/* Update .gmtcommands */

	if (GMT_io.binary[0] && gmtdefs.verbose) {
		char *type[2] = {"double", "single"};
		fprintf (stderr, "%s: Expects %d-column %s-precision binary data\n", GMT_program, GMT_io.ncol[0], type[GMT_io.single_precision[0]]);
	}

#ifdef SET_IO_MODE
	GMT_setmode (1);
#endif

	idx = 1.0 / delta_x;
	idy = 1.0 / delta_y;
	n_x = irint ((east - west) * idx) + 1;
	n_y = irint ((north - south) * idy) + 1;
	
	if (offset) {
		n_x--;
		n_y--;
		del_off = 0.0;
	}
	
	if (gmtdefs.verbose) {
		sprintf (format, "%%s: W: %s E: %s S: %s N: %s nx: %%d ny: %%d\n\0", gmtdefs.d_format, gmtdefs.d_format, gmtdefs.d_format, gmtdefs.d_format);
		fprintf (stderr, format, GMT_program, west, east, south, north, n_x, n_y);
	}

	n_read = n_pitched = 0;
        n_alloc = GMT_CHUNK;
        data = (struct DATA *) GMT_memory (VNULL, (size_t)n_alloc, sizeof(struct DATA), GMT_program);
	
	/* Read the input data  */
	
	x = (gmtdefs.xy_toggle) ? 1 : 0;        y = 1 - x;              /* Set up which columns have x and y */

	n_expected_fields = (GMT_io.binary[0]) ? GMT_io.ncol[0] : BUFSIZ;

	if (n_files > 0)
		nofile = FALSE;
	else
		n_files = 1;
	n_args = (argc > 1) ? argc : 2;
	
	for (fno = 1; !done && fno < n_args; fno++) {	/* Loop over input files, if any */
		if (!nofile && argv[fno][0] == '-') continue;
		
		if (nofile) {	/* Just read standard input */
			fp = GMT_stdin;
			done = TRUE;
#ifdef SET_IO_MODE
			GMT_setmode (0);
#endif
		}
		else if ((fp = GMT_fopen (argv[fno], GMT_io.r_mode)) == NULL) {
			fprintf (stderr, "%s: Cannot open file %s\n", GMT_program, argv[fno]);
			continue;
		}

		if (!nofile && gmtdefs.verbose) fprintf (stderr, "%s: Working on file %s\n", GMT_program, argv[fno]);
		
		if (gmtdefs.io_header) {
			for (i = 0; i < gmtdefs.n_header_recs; i++) {
				fgets (buffer, BUFSIZ, fp);
				buffer[strlen(buffer)-1] = 0;
				if (first) (report && !(weighted)) ? printf ("%s weights\n", buffer) : printf ("%s\n", buffer);
			}
			first = FALSE;
		}

		n_fields = GMT_input (fp,  &n_expected_fields, &in);
				
		while (! (GMT_io.status & GMT_IO_EOF)) {	/* Not yet EOF */
	
			skip = FALSE;
			
			if (GMT_io.status & GMT_IO_MISMATCH) {
				fprintf (stderr, "%s: Mismatch between actual (%d) and expected (%d) fields near line %d\n", GMT_program, n_fields,  n_expected_fields, n_read);
				exit (EXIT_FAILURE);
			}

			if (GMT_is_dnan (in[2])) skip = TRUE;	/* Skip when z = NaN */

			if (!skip) {	/* Check if point is inside area */
				n_read++;
		
				ix = (int)floor(((in[x] - west) * idx) + del_off);
				if ( ix < 0 || ix >= n_x ) skip = TRUE;
				iy = (int)floor(((in[y] - south) * idy) + del_off);
				if ( iy < 0 || iy >= n_y ) skip = TRUE;
			}
		
			if (!skip) {
				index = iy * n_x + ix;
		
				data[n_pitched].i = index;
				data[n_pitched].a[0] = in[x];
				data[n_pitched].a[1] = in[y];
				data[n_pitched].a[2] = in[2];
				data[n_pitched].a[3] = (weighted) ? in[3] : 1.0;
		
				n_pitched++;
				if (n_pitched == n_alloc) {
					n_alloc += GMT_CHUNK;
					data = (struct DATA *) GMT_memory ((void *)data, (size_t)n_alloc, sizeof(struct DATA), GMT_program);
				}
			}
			
			n_fields = GMT_input (fp, &n_expected_fields, &in);
		}
		if (fp != GMT_stdin) GMT_fclose(fp);
		
	}
	
	data = (struct DATA *) GMT_memory ((void *)data, (size_t)n_pitched, sizeof(struct DATA), GMT_program);
	n_lost = n_read - n_pitched;
	if (gmtdefs.verbose) fprintf(stderr,"%s: N read: %d N used: %d N outside_area: %d\n", GMT_program, 
		n_read,n_pitched,n_lost);


	/* Ready to go. */
	
	n_out = (report) ? 4 : 3;
	
	/* Sort on index and Z value */
	
	qsort((void *)data, (size_t)n_pitched, sizeof (struct DATA), compare_index_z);
	
	/* Find n_in_cell and write appropriate output  */

	first_in_cell = 0;
	n_cells_filled = 0;
	while (first_in_cell < n_pitched) {
		out[3] = data[first_in_cell].a[3];
		out[x] = data[first_in_cell].a[0];
		out[y] = data[first_in_cell].a[1];
		first_in_new_cell = first_in_cell + 1;
		while ( (first_in_new_cell < n_pitched) && (data[first_in_new_cell].i == data[first_in_cell].i) ) {
			out[3] += data[first_in_new_cell].a[3];
			out[x] += data[first_in_new_cell].a[0];
			out[y] += data[first_in_new_cell].a[1];
			first_in_new_cell++;
		}
		n_in_cell = first_in_new_cell - first_in_cell;
		if (n_in_cell > 2) {
			/* data are already sorted on z; get z mode  */
			out[2] = weighted_mode (&data[first_in_cell], out[3], n_in_cell, 2);
			if (go_quickly) {
				i_n_in_cell = 1.0 / n_in_cell;
				out[0] *= i_n_in_cell;
				out[1] *= i_n_in_cell;
			}
			else {
				qsort((void *)&data[first_in_cell], (size_t)n_in_cell, sizeof (struct DATA), compare_x);
				out[x] = weighted_mode (&data[first_in_cell], out[3], n_in_cell, 0);
				
				qsort((void *)&data[first_in_cell], (size_t)n_in_cell, sizeof (struct DATA), compare_y);
				out[y] = weighted_mode (&data[first_in_cell], out[3], n_in_cell, 1);
			}
		}
		else if (n_in_cell == 2) {
			if (data[first_in_cell].a[3] > data[first_in_cell+1].a[3]) {
				out[2] = data[first_in_cell].a[2];
				if (go_quickly) {
					out[0] *= 0.5;
					out[1] *= 0.5;
				}
				else {
					out[x] = data[first_in_cell].a[0];
					out[y] = data[first_in_cell].a[1];
				}
			}
			else if (data[first_in_cell].a[3] < data[first_in_cell+1].a[3]) {
				out[2] = data[first_in_cell+1].a[2];
				if (go_quickly) {
					out[0] *= 0.5;
					out[1] *= 0.5;
				}
				else {
					out[x] = data[first_in_cell+1].a[0];
					out[y] = data[first_in_cell+1].a[1];
				}
			}
			else {
				out[0] *= 0.5;
				out[1] *= 0.5;
				out[2] *= 0.5;
			}
		}
		/* else n_in_cell == 1, which means out[] is fine as is.  */		
		
		GMT_output (GMT_stdout, n_out, out);
		
		n_cells_filled++;
		first_in_cell = first_in_new_cell;
	}

	if (gmtdefs.verbose) fprintf(stderr,"%s: N_cells_filled: %d\n", GMT_program, n_cells_filled);

	GMT_free ((void *)data);
	
        GMT_end (argc, argv);
}

double	weighted_mode (struct DATA *d, double wsum, int n, int k)
{
	/* Estimate mode by finding a maximum in the estimated
	   pdf of weighted data.  Estimate the pdf as the finite 
	   difference of the cumulative frequency distribution 
	   over points from i to j.  This has the form top/bottom,
	   where top is the sum of the weights from i to j, and
	   bottom is (data[j] - data[i]).  Strategy is to start
	   with i=0, j=n-1, and then move i or j toward middle
	   while j-i > n/2 and bottom > 0.  At end while, midpoint
	   of range from i to j is the mode estimate.  Choose 
	   to move either i or j depending on which one will
	   cause greatest increase in pdf estimate.  If a tie,
	   alternate between i and j to avoid bias.
	   
	   Strictly, the pdf estimated this way would need to be
	   scaled by (1/wsum), but this is constant so we don't
	   use it here, as we are seeking a relative minimum.  
	   
	   I assumed n > 2 when I wrote this.  */
	   
	double	top, topj, topi, bottomj, bottomi, pj, pi;
	int	i, j, nh;

	i = 0;
	j = n - 1;
	nh = n / 2;
	top = wsum;
	
	while (j-i > nh) {
		topi = top - d[i].a[3];
		topj = top - d[j].a[3];
		bottomi = d[j].a[k] - d[i+1].a[k];
		bottomj = d[j-1].a[k] - d[i].a[k];

		if (bottomj == 0.0) {
			return (d[j-1].a[k]);
		}
		else if (bottomi == 0.0) {
			return (d[i+1].a[k]);
		}
		else {
			pi = topi/bottomi;
			pj = topj/bottomj;
			if (pi > pj) {
				i++;
				top = topi;
			}
			else if (pi < pj) {
				j--;
				top = topj;
			}
			else {	/* flip a coin  */
				if (i%2 == 1) {
					i++;
					top = topi;
				}
				else {
					j--;
					top = topj;
				}
			}
		}
	}
	return(0.5*(d[j].a[k] + d[i].a[k]));
}

int	compare_index_z (const void *point_1, const void *point_2)
{
	int	index_1, index_2;
	double	data_1, data_2;
	struct DATA *p1, *p2;
	
	p1 = (struct DATA *)point_1;
	p2 = (struct DATA *)point_2;
	index_1 = p1->i;
	index_2 = p2->i;

	if (index_1 < index_2)
		return (-1);
	else if (index_1 > index_2)
		return (1);
	else {
		data_1 = p1->a[2];
		data_2 = p2->a[2];
		if (data_1 < data_2)
			return (-1);
		else if (data_1 > data_2)
			return (1);
		else
			return (0);
	}
}
int	compare_x(const void *point_1, const void *point_2)
{
	int	index_1, index_2;
	double	x_1, x_2;
	struct DATA *p1, *p2;
	
	p1 = (struct DATA *)point_1;
	p2 = (struct DATA *)point_2;
	index_1 = p1->i;
	index_2 = p2->i;

	if (index_1 < index_2)
		return (-1);
	else if (index_1 > index_2)
		return (1);
	else {
		x_1 = p1->a[0];
		x_2 = p2->a[0];
		if (x_1 < x_2)
			return (-1);
		else if (x_1 > x_2)
			return (1);
		else
			return (0);
	}
}

int	compare_y(const void *point_1, const void *point_2)
{
	int	index_1, index_2;
	double	y_1, y_2;
	struct DATA *p1, *p2;
	
	p1 = (struct DATA *)point_1;
	p2 = (struct DATA *)point_2;
	index_1 = p1->i;
	index_2 = p2->i;

	if (index_1 < index_2)
		return (-1);
	else if (index_1 > index_2)
		return (1);
	else {
		y_1 = p1->a[1];
		y_2 = p2->a[1];
		if (y_1 < y_2)
			return (-1);
		else if (y_1 > y_2)
			return (1);
		else
			return (0);
	}
}
