/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**-----------------------------------------------------------------
** Changes in expandstar and parseterm to fix the wildcard * problem.
** G. Hill, ghill@library.berkeley.edu  3/11/97
**
** Changes in notresultlist, parseterm, and fixnot to fix the NOT problem
** G. Hill, ghill@library.berkeley.edu 3/13/97
**
** Changes in search, parseterm, fixnot, operate, getfileinfo
** to support METADATA
** G. Hill 3/18/97 ghill@library.berkeley.edu
**
** Change in search to allow for search with a list including
** also some empty indexes.
** G. Hill after a suggestion by J. Winstead 12/18/97
**
** Created countResults for number of hits in search
** G. Hill 12/18/97
**
**
** Change in search to allow maxhits to return N number
** of results for each index specified
** D. Norris after suggestion by D. Chrisment 08/29/99
**
** Created resultmaxhits as a global, renewable maxhits
** D. Norris 08/29/99
**
** added word length arg to Stem() call for strcat overflow checking in stemmer.c
** added safestrcpy() macro to avoid corruption from strcpy overflow
** SRE 11/17/99
**
** 10/10/99 & 11/23/99 - Bill Moseley (merged by SRE)
**   - Changed to stem words *before* expanding with expandstar
**     so can find words in the index
**   - Moved META tag check before expandstar so META names don't get
**     expanded!
**
** fixed cast to int problems pointed out by "gcc -Wall"
** SRE 2/22/00
**
** fixed search() for case where stopword is followed by rule:
**   stopword was removed, rule was left, no matches ever found
** added "# Stopwords removed:" to output header so caller can
**   trap actions of IGNORE_STOPWORDS_IN_QUERY
** SRE 2/25/00
**
** 04/00 - Jose Ruiz
** Added code for phrase search
**     - New function phraseresultlists
**     - New function expandphrase
**
** 04/00 - Jose Ruiz
** Added freeresult function for freing results memory
** Also added changes to orresultlists andresultlists notresultlist
**  for freing memory
**
** 04/00 - Jose Ruiz
** Now use bighash instead of hash for better performance in
** orresultlist (a* or b*). Also changed hash.c
**
** 04/00 - Jose Ruiz
** Function getfileinfo rewrite
**     - Now use a hash approach for faster searching
**     - Solves the long timed searches (a* or b* or c*)
**
** 04/00 - Jose Ruiz
** Ordering of result rewrite
** Now builtin C function qsort is used for faster ordering of results
** This is useful when lots of results are found
** For example: not (axf) -> This gives you all the documents!!
**
** 06/00 - Jose Ruiz
** Rewrite of andresultlits and phraseresultlists for better permonace
** New function notresultlits for better performance
**
*/

#include "swish.h"
#include "search.h"
#include "index.h"
#include "file.h"
#include "list.h"
#include "string.h"
#include "merge.h"
#include "hash.h"
#include "mem.h"
#include "docprop.h"
#include "stemmer.h"
#include "soundex.h"

int resultmaxhits;  /* Added DN 08/29/99  */

/* 04/00 Jose Ruiz */
/* Simple routing for comparing pointers to integers in order to
get an ascending sort with qsort */
int icomp(const void *s1,const void *s2)
{
	return(*(int *)s1 - *(int *)s2);
}

/* 04/00 Jose Ruiz */
/* Simple routing for comparing pointers to integers in order to
get an ascending sort with qsort */
/* Identical to previous one but use two integers per array */
int icomp2(const void *s1,const void *s2)
{
int rc,*p1,*p2;
	rc=(*(int *)s1 - *(int *)s2);
	if(rc) return(rc);
	else {
		p1=(int *)s1;
		p2=(int *)s2;
		return(*(++p1) - *(++p2));
	}
}

/* The main search function.
** Parentheses are stripped out, things made lowercase,
** extra blanks removed, etc.
*/

void search(words, indexlist, structure)
char *words;
struct swline *indexlist;
int structure;
{
	int i, j, metaName, indexYes, totalResults;
	float num;
	char word[MAXWORDLEN];
	struct result *resultlist;
	struct result *sortresultlist;
	struct swline *tmplist,*tmplist2;
	FILE *fp;
	int isSortByProperty;
	/* int resultmaxhits;*/  /* Added DN 08/29/99  */
#ifdef DEBUG
	struct swline *newp2;
#endif
#ifdef IGNORE_STOPWORDS_IN_QUERY
    struct swline *pointer1, *pointer2, *pointer3;
#endif
	/* 06/00 Jose Ruiz
	** Added to handle several index file headers */
        char *wordchars1,*beginchars1,*endchars1,*ignorelastchar1,*ignorefirstchar1,*indexn1,*indexp1,*indexa1,*indexd1;
        char *wordcharsM,*begincharsM,*endcharsM,*ignorelastcharM,*ignorefirstcharM;
	char *filenames;
        int applyStemmingRules1,applySoundexRules1,minwordlimit1,maxwordlimit1;
	int merge;
	
	merge=0;
	filenames=NULL;
	sortresultlist=NULL;
	j=0;
	
	isSortByProperty = 0;
	
	searchwordlist = NULL;
	metaName = 1;
	indexYes = 0;
        wordchars1=beginchars1=endchars1=ignorelastchar1=ignorefirstchar1=indexn1=indexp1=indexa1=indexd1=NULL;
        applyStemmingRules1=applySoundexRules1=minwordlimit1=maxwordlimit1=totalwords=totalfiles=0;


	/* First of all . Read header default values from all index fileis */
	/* With this, we read wordchars, stripchars, ... */
	/* Also merge them */
        for (tmplist=indexlist;tmplist;) {
                commonerror = bigrank = 0;
                if ((fp = openIndexFileForRead(tmplist->line)) == NULL) {
                        printf("# Name: unknown index\n");
                        printf("err: could not open index file\n.\n");
                        exit(0);
                }
                if (!isokindexheader(fp)) {
                        printf("err: the index file format is unknown\n.\n");
                        exit(0);
                }
		readheader(fp);
		fclose(fp);
		if(merge) {
			if(strcmp(wordchars1,wordchars)) {
				wordcharsM=mergestrings(wordchars1,wordchars);
				wordchars=SafeStrCopy(wordchars,wordcharsM,&lenwordchars);
				efree(wordcharsM);
			}
			if(strcmp(beginchars1,beginchars)) {
				begincharsM=mergestrings(beginchars1,beginchars);
				beginchars=SafeStrCopy(beginchars,begincharsM,&lenbeginchars);
				efree(begincharsM);
			}
			if(strcmp(endchars1,endchars)) {
				endcharsM=mergestrings(endchars1,endchars);
				endchars=SafeStrCopy(endchars,endcharsM,&lenendchars);
				efree(endcharsM);
			}
			if(strcmp(ignorelastchar1,ignorelastchar)) {
				ignorelastcharM=mergestrings(ignorelastchar1,ignorelastchar);
				ignorelastchar=SafeStrCopy(ignorelastchar,ignorelastcharM,&lenignorelastchar);
				efree(ignorelastcharM);
			}
			if(strcmp(ignorefirstchar1,ignorefirstchar)) {
				ignorefirstcharM=mergestrings(ignorefirstchar1,ignorefirstchar);
				ignorefirstchar=SafeStrCopy(ignorefirstchar,ignorefirstcharM,&lenignorefirstchar);
				efree(ignorefirstcharM);
			}
			applyStemmingRules=applyStemmingRules1 && applyStemmingRules;
			applySoundexRules=applySoundexRules1 && applySoundexRules;
			if(minwordlimit1<minwordlimit) minwordlimit=minwordlimit1;
			if(maxwordlimit1<maxwordlimit) maxwordlimit=maxwordlimit1;
			if(strcmp(indexp1,indexp)) indexp=SafeStrCopy(indexp,"(several)",&lenindexp);
			if(strcmp(indexa1,indexa)) indexa=SafeStrCopy(indexa,"(several)",&lenindexa);
			if(strcmp(indexd1,indexd)) indexd=SafeStrCopy(indexd,"(several)",&lenindexd);
			if(strcmp(indexn1,indexn)) indexd=SafeStrCopy(indexn,"(several)",&lenindexn);

			efree(wordchars1);
			efree(beginchars1);
			efree(endchars1);
			efree(ignorelastchar1);
			efree(ignorefirstchar1);
			efree(indexn1);
			efree(indexd1);
			efree(indexa1);
			efree(indexp1);
		}
		tmplist=tmplist->next;
		if(tmplist) {  /* If there are more index file we need
				 ** to preserve header values */
		
			wordchars1=estrdup(wordchars);sortstring(wordchars1);
			beginchars1=estrdup(beginchars);sortstring(beginchars1);
			endchars1=estrdup(endchars);sortstring(endchars1);
			ignorelastchar1=estrdup(ignorelastchar);sortstring(ignorelastchar1);
			ignorefirstchar1=estrdup(ignorefirstchar);sortstring(ignorefirstchar1);
			indexn1=estrdup(indexn);
			indexp1=estrdup(indexp);
			indexa1=estrdup(indexa);
			indexd1=estrdup(indexd);
			applyStemmingRules1=applyStemmingRules;
			applySoundexRules1=applySoundexRules;
			minwordlimit1=minwordlimit;
			maxwordlimit1=maxwordlimit;
			merge=1;
		}
		totalwords+=totalwordsheader;
		totalfiles+=totalfilesheader;
		if(!filenames) 
			filenames=estrdup(savedasheader);
		else {
			filenames=erealloc(filenames,strlen(filenames)+strlen(savedasheader)+2);
			sprintf(filenames,"%s %s",filenames,savedasheader);
		}
	}
	printheader(stdout, filenames, totalwords, totalfiles, 0);
	efree(filenames);
	
	/* Make lookuptables for char processing */
	makelookuptable(wordchars,wordcharslookuptable);
	makelookuptable(beginchars,begincharslookuptable);
	makelookuptable(endchars,endcharslookuptable);
	makelookuptable(ignorefirstchar,ignorefirstcharlookuptable);
	makelookuptable(ignorelastchar,ignorelastcharlookuptable);

	for (i = j = 0; words[i] != '\0' && words[i] != '\n'; i++) 
	{
		/* 06/00 Jose ruiz
		** Following line modified to extract words according
		** to wordchars as suggested by Bill Moseley
		*/
		if (isspace((int)words[i]) || words[i] == '(' || words[i] == ')' || words[i] == '=' || words[i] == PHRASE_DELIMITER_CHAR || !(words[i]=='*' || iswordchar(words[i]))) /* cast to int, 2/22/00 */
		{
			if (words[i] == '=')
			{
				if (j != 0)
				{
					if (words[i-1] != '\\')
					{ 
						word[j] = '\0';
						searchwordlist = (struct swline *) addswline(searchwordlist, (char *) convertentities(word));
						j = 0;
						searchwordlist = (struct swline *) addswline(searchwordlist, "=");
					}
					else
					{
						/* Needs to erase the '\' */
						j--;
						word[j] = tolower(words[i]);
						j++;
					}
				}
				else
				{
					searchwordlist = (struct swline *) addswline(searchwordlist, "=");
				}
			}
			else
			{
				if (j) 
				{
					word[j] = '\0';
                               /* Convert chars ignored in words to spaces  */
                                        stripIgnoreLastChars(word);
                                        stripIgnoreFirstChars(word);
					if(strlen(word))
						searchwordlist = (struct swline *) addswline(searchwordlist, (char *) convertentities(word));
					j = 0;
				}
				if (words[i] == '(') 
				{
					searchwordlist = (struct swline *) addswline(searchwordlist, "(");
				}
				if (words[i] == ')') 
				{
					searchwordlist = (struct swline *) addswline(searchwordlist, ")");
				}
				if (words[i] == PHRASE_DELIMITER_CHAR) 
				{
					searchwordlist = (struct swline *) addswline(searchwordlist, PHRASE_DELIMITER_STRING);
				}
			}
		}
		else 
		{
			word[j] = tolower(words[i]);
			j++;
		}
	}
	if (j) 
	{
		word[j] = '\0';
       /* Convert chars ignored in words to spaces  */
                stripIgnoreLastChars(word);
                stripIgnoreFirstChars(word);
		if(strlen(word))
			searchwordlist = (struct swline *) addswline(searchwordlist, (char *) convertentities(word));
	}
	
	printf("%s\n", INDEXHEADER);
	if (words[0] == '\0') 
	{
		printf("err: no search words specified\n.\n");
		exit(0);
	}

	while (indexlist != NULL) {
		
		commonerror = bigrank = 0;
		
		if ((fp = openIndexFileForRead(indexlist->line)) == NULL) {
			printf("# Name: unknown index\n");
			printf("err: could not open index file\n.\n");
			exit(0);
		}
		
		if (!totalfiles) {
			indexlist = indexlist->next;
			continue;
		}
		else
		{ indexYes = 1; /*There is a non-empty index */ }

		readoffsets(fp);
		readhashoffsets(fp);
		readstopwords(fp);
		readfileoffsets(fp);
		readMetaNames(fp);
	
#ifdef IGNORE_STOPWORDS_IN_QUERY
		/* Added JM 1/10/98. */
		/* completely re-written 2/25/00 - SRE - "ted and steve" --> "and steve" if "ted" is stopword --> no matches! */

		/* walk the list, looking for rules & stopwords to splice out */
		/* remove a rule ONLY if it's the first thing on the line */
		/*   (as when exposed by removing stopword that comes before it) */

		/* loop on FIRST word: quit when neither stopword nor rule (except NOT rule) or metaname (last one as suggested by Adrian Mugnolo) */
		pointer1 = searchwordlist;
		while (pointer1 != NULL) {
			pointer2 = pointer1->next;
				/* 05/00 Jose Ruiz
				** NOT rule is legal at begininig */
			if(isnotrule(pointer1->line) || isMetaName(pointer2)) break;
			if(!isstopword(pointer1->line) && !isrule(pointer1->line)) break;
			searchwordlist = pointer2; /* move the head of the list */
			printf("# Removed stopword: %s\n",pointer1->line);
				 /* Free line also !! Jose Ruiz 04/00 */
			efree(pointer1->line);
			efree(pointer1); /* toss the first point */
				 /* Free line also !! Jose Ruiz 04/00 */
			pointer1 = pointer2; /* reset for the loop */
		}
		if (pointer1 == NULL) {
			/* This query contained only stopwords! */
			printf("err: all search words too common to be useful\n.\n");
			exit(0);
		}

		/* loop on REMAINING words: ditch stopwords but keep rules (unless two rules in a row?) */
		pointer2 = pointer1->next;
		while (pointer2 != NULL) {
			/* Added Patch from Adrian Mugnolo */
			if((isstopword(pointer2->line) && !isrule(pointer2->line) && !isMetaName(pointer2->next))    /* non-rule stopwords */
			|| (    isrule(pointer1->line) &&  isrule(pointer2->line))) { /* two rules together */
			
				printf("# Removed stopword: %s\n",pointer2->line);    /* keep 1st of 2 rule */
				pointer1->next = pointer2->next;
				pointer3 = pointer2->next;
					/* Jose Ruiz 04/00
					** Fix memory problem
					*/
				efree(pointer2->line);
				efree(pointer2);
				pointer2 = pointer3;
			}
			else {
				pointer1 = pointer1->next;
				pointer2 = pointer2->next;
			}
			/* Jose Ruiz 04/00
			** Removed!! If pointer2 was previously freed 
			** we must not reassign it contents here
			** pointer2 = pointer2->next;
			*/
		}
#endif /* IGNORE_STOPWORDS_IN_QUERY */
	
		printf("# Search words:");
		tmplist = searchwordlist;
		while (tmplist != NULL) {
			printf(" %s", tmplist->line);
			tmplist = tmplist->next;
		}
		putchar('\n');
		
		resultlist = NULL;
		tmplist = searchwordlist;
			/* Expand phrase search: _kim harlow_ becomes (kim PHRASE_WORD harlow) */
		tmplist = (struct swline *) expandphrase(tmplist,PHRASE_DELIMITER_CHAR);
#ifdef DEBUG
                newp2 = tmplist;
                while (newp2 != NULL) {
                        printf("%s ", newp2->line);
                        newp2 = newp2->next;
                }
                putchar('\n');
#endif

		tmplist = (struct swline *) fixnot(tmplist); 
        
		/* Move this ahead of expandstar so can check for meta names and not expand them - moseley */
		initSearchResultProperties();
/*
** Jose Ruiz 04/00
** Removed. Faster implementation in getfileinfo
**		searchwordlist = (struct swline *) expandstar(tmplist, fp);
*/
		searchwordlist = tmplist;

#ifdef DEBUG
		newp2 = searchwordlist;
		while (newp2 != NULL) {
			printf("%s ", newp2->line);
			newp2 = newp2->next;
		}
		putchar('\n');
#endif
		resultlist = (struct result *) parseterm(fp, 0, metaName);

/* 
** 04/00 Jose Ruiz - Get properties first before sorting. In this way
** we can sort by  the results by metaName
*/
		resultlist = (struct result *) getproperties(resultlist,fp);
		isSortByProperty = (int) isSortProp();
		if (isSortByProperty) sortresultlist = (struct result *) sortresultsbyproperty(resultlist,structure,fp);

/* 
04/00 Jose Ruiz - Sort results by rank
*/
		if (!isSortByProperty) sortresultlist = (struct result *) sortresultsbyrank(resultlist,structure,fp);

		if (sortresultlist == NULL) {
			if (commonerror)
				printf("err: a word is too common\n");
			else
				printf("err: no results\n");
		}
		else {
			if (bigrank)
				num = 1000.0f / (float) bigrank;
			else
				num = 1000.0f;
			totalResults = countResults(sortresultlist);
			printf("# Number of hits: %d\n",totalResults);
			resultmaxhits = maxhits; /* Reset resultmaxhits to maxhits for this index. DN 08/29/99  */
			printsortedresults(sortresultlist, num, fp);
				/* free all results */
			freeresultlist(resultlist);
		}
		
		/* keep file open during printsortedresults() so that 
		* doc properties can be retrieved */
		fclose(fp);

		searchwordlist = tmplist;
		indexlist = indexlist->next;
		
	}
		/* Free memory when finished !! */
	tmplist = searchwordlist;
	while (tmplist) {
		tmplist2 = tmplist->next;
		efree(tmplist->line);
		efree(tmplist);
		tmplist = tmplist2;
	}
		/* Free fileoffsets */
	freefilehashlist();
		/* Free stopword hash table */
	freestophash();

	if (!indexYes)
	{
		printf("err: the index file(s) is empty\n.\n");
		exit(0);
	}

	printf(".\n");
}


/* This puts parentheses in the right places around not structures
** so the parser can do its thing correctly.
** It does it both for 'not' and '='; the '=' is used for the METADATA (GH)
*/

struct swline *fixnot(sp)
struct swline *sp;
{
	int openparen, hasnot;
	int openMeta, hasMeta;
	int isfirstnot=0,metapar;
	struct swline *tmpp, *newp;
#ifdef DEBUG
	struct swline *newp2;
#endif
	/* 06/00 Jose Ruiz - Check if first word is NOT_RULE */
	/* Change remaining NOT by AND_NOT_RULE */
	for(tmpp = sp;tmpp;tmpp=tmpp->next) {
		if (tmpp->line[0]=='(') continue;
		else if ( isnotrule(tmpp->line) ) {
			isfirstnot=1;
		} else break;
	}
	for(tmpp = sp;tmpp;tmpp=tmpp->next) {
		if ( isnotrule(tmpp->line)) {
			if(!isfirstnot) {
				efree(tmpp->line);
				tmpp->line=estrdup(AND_NOT_WORD);
			}else isfirstnot=0;
		}
	}

	tmpp = sp;
	newp = NULL;
	
	openparen = 0;
	openMeta = 0;
	hasMeta = 0;
	hasnot = 0;
	while (tmpp != NULL) {
		if ( ((tmpp->line)[0] == '(') && hasnot)
			openparen++;
		else if ( ((tmpp->line)[0] == '(') && hasMeta) 
			openMeta++;
		else if ( ((tmpp->line)[0] == ')') && hasnot)
			openparen--;
		else if ( ((tmpp->line)[0] == ')') && hasMeta)
			openMeta--;
		if (isMetaName(tmpp->next)) {
			/* If it is a metaName add the name and = and skip to next */
			hasMeta = 1;
			newp = (struct swline *) addswline(newp, "(");
			newp = (struct swline *) addswline(newp, tmpp->line);
			newp = (struct swline *) addswline(newp, "=");
			tmpp = tmpp->next;
			tmpp = tmpp->next;
			continue;
		}
		if ( isnotrule(tmpp->line) ) {
			hasnot = 1;
			newp = (struct swline *) addswline(newp, "("); 
		}
		else if (hasnot && !openparen) {
			hasnot = 0;
			newp = (struct swline *) addswline(newp, tmpp->line);
			newp = (struct swline *) addswline(newp, ")");
			tmpp = tmpp->next;
			continue;
		}
		else if (hasMeta && !openMeta) {
			hasMeta = 0;
				/* 06/00 Jose Ruiz
				** Fix to consider parenthesys in the
				** content of a MetaName */
			if (tmpp->line[0] == '(') {
				metapar=1;
				newp = (struct swline *) addswline(newp, tmpp->line);
				tmpp=tmpp->next;
				while (metapar && tmpp) {
					if (tmpp->line[0]=='(') metapar++;
					else if (tmpp->line[0]==')') metapar--;
					newp = (struct swline *) addswline(newp, tmpp->line);
					if(metapar) tmpp=tmpp->next;
				}
				if(!tmpp) return(newp);
			} else
				newp = (struct swline *) addswline(newp, tmpp->line);
			newp = (struct swline *) addswline(newp, ")");
			tmpp = tmpp->next;
			continue;
		}
		newp = (struct swline *) addswline(newp, tmpp->line);
		if (isMetaName(tmpp)) {
			hasMeta = 1;
			newp = (struct swline *) addswline(newp, "(");
		}
		tmpp = tmpp->next;
	}
	
#ifdef DEBUG
	newp2 = newp;
	while (newp2 != NULL) {
		printf("%s ", newp2->line);
		newp2 = newp2->next;
	}
	putchar('\n');
#endif
	
	return newp;
}

/* expandstar removed - Jose Ruiz 04/00 */

/* Expands phrase search. Berkeley University becomes Berkeley PHRASE_WORD University */
struct swline *expandphrase( struct swline *sp, char delimiter)
{
struct swline *tmp,*newp;
int inphrase;
	inphrase = 0;
	newp = NULL;
	tmp = sp;
	while(tmp != NULL) {
		if((tmp->line)[0]==delimiter) {
			if (inphrase) 
			{
				inphrase = 0;
				newp = (struct swline *) addswline(newp,")");
			}
			else
			{
				inphrase++;
				newp = (struct swline *) addswline(newp,"(");
			}
		}
		else
		{
			if (inphrase)
			{
				if(inphrase > 1) 
				newp = (struct swline *) addswline(newp,PHRASE_WORD);
				inphrase++;
			}
			newp = (struct swline *) addswline(newp,tmp->line);
		}
		tmp = tmp->next;
	}
	return newp;
}

/*  getmatchword removed. Obsolete. Jose Ruiz 04/00 */

/* Reads and prints the header of an index file.
** Also reads the information in the header (wordchars, beginchars, etc)
*/

void readheader(fp)
FILE *fp;
{
	int c;
	char line[MAXSTRLEN];
	
	fgets(line, MAXSTRLEN, fp);
	while (1) {
		if(!(c = fgetc(fp))) break;
		ungetc(c, fp);
		if (c == '#') {
			fgets(line, MAXSTRLEN, fp);
			if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
			if(!strncmp(line,WORDCHARSHEADER,strlen(WORDCHARSHEADER))) {
				wordchars = SafeStrCopy(wordchars,line+strlen(WORDCHARSHEADER)+1,&lenwordchars);
				sortstring(wordchars);
			} else if(!strncmp(line,BEGINCHARSHEADER,strlen(BEGINCHARSHEADER))) {
				beginchars = SafeStrCopy(beginchars,line+strlen(BEGINCHARSHEADER)+1,&lenbeginchars);
				sortstring(beginchars);
			} else if(!strncmp(line,ENDCHARSHEADER,strlen(ENDCHARSHEADER))){  
				endchars = SafeStrCopy(endchars,line+strlen(ENDCHARSHEADER)+1,&lenendchars);
				sortstring(endchars);
			} else if(!strncmp(line,IGNOREFIRSTCHARHEADER,strlen(IGNOREFIRSTCHARHEADER)))  {
				ignorefirstchar = SafeStrCopy(ignorefirstchar,line+strlen(IGNOREFIRSTCHARHEADER)+1,&lenignorefirstchar);
				sortstring(ignorefirstchar);
			} else if(!strncmp(line,IGNORELASTCHARHEADER,strlen(IGNORELASTCHARHEADER)))  {
				ignorelastchar = SafeStrCopy(ignorelastchar,line+strlen(IGNORELASTCHARHEADER)+1,&lenignorelastchar);
				sortstring(ignorelastchar);
			} else if(!strncmp(line,STEMMINGHEADER,strlen(STEMMINGHEADER)))  
				applyStemmingRules = atoi(line+strlen(STEMMINGHEADER)+1);
			else if(!strncmp(line,SOUNDEXHEADER,strlen(SOUNDEXHEADER)))  
				applySoundexRules = atoi(line+strlen(SOUNDEXHEADER)+1);
			else if(!strncmp(line,MINWORDLIMHEADER,strlen(MINWORDLIMHEADER)))  
				minwordlimit = atoi(line+strlen(MINWORDLIMHEADER)+1);
			else if(!strncmp(line,MAXWORDLIMHEADER,strlen(MAXWORDLIMHEADER)))  
				maxwordlimit = atoi(line+strlen(MAXWORDLIMHEADER)+1);
			else if(!strncmp(line,SAVEDASHEADER,strlen(SAVEDASHEADER)))  
				savedasheader = SafeStrCopy(savedasheader,line+strlen(SAVEDASHEADER)+1,&lensavedasheader);
			else if(!strncmp(line,NAMEHEADER,strlen(NAMEHEADER)))  
				indexn = SafeStrCopy(indexn,line+strlen(NAMEHEADER)+1,&lenindexn);
			else if(!strncmp(line,DESCRIPTIONHEADER,strlen(DESCRIPTIONHEADER)))  
				indexd = SafeStrCopy(indexd,line+strlen(DESCRIPTIONHEADER)+1,&lenindexd);
			else if(!strncmp(line,POINTERHEADER,strlen(POINTERHEADER)))  
				indexp = SafeStrCopy(indexp,line+strlen(POINTERHEADER)+1,&lenindexp);
			else if(!strncmp(line,MAINTAINEDBYHEADER,strlen(MAINTAINEDBYHEADER)))  
				indexa = SafeStrCopy(indexa,line+strlen(MAINTAINEDBYHEADER)+1,&lenindexa);
			else if(!strncmp(line,INDEXEDONHEADER,strlen(INDEXEDONHEADER)))  
				indexedon = SafeStrCopy(indexedon,line+strlen(INDEXEDONHEADER)+1,&lenindexedon);
			else if(!strncmp(line,COUNTSHEADER,strlen(COUNTSHEADER)))  {
				totalwordsheader=totalfilesheader=0;
				sscanf(line+strlen(COUNTSHEADER),"%d words, %d files",&totalwordsheader,&totalfilesheader);
			}
			continue;
		}
		else
			break;
	}
	fseek(fp, 0, 0);
}

/* Reads the offsets in the index file so word lookup is faster.
*/

void readoffsets(fp)
FILE *fp;
{
	/* 06/00 - Jose Ruiz    
	** Use a null char delimiter 
	int c, i;

	fseek(fp, 0, 0);
	while (1) {
		c = fgetc(fp);
		if (c == '#') {
			do {
				c = fgetc(fp);
			} while (c && c != '\n');
			continue;
		}
		else
			break;
	}
	c = ungetc(c,fp);

	*/

	int i;
	fseek(fp, 0, 0);
	while (fgetc(fp));    /* Jump header */

	for (i=0;i<MAXCHARS;i++) 
		offsets[i] = readlong(fp);
	fgetc(fp); /* Jump '\n' */
}

/*Jose Ruiz 04/00
Reads the hash index
The file pointer is set by readoffsets */
void readhashoffsets(fp)
FILE *fp;
{
int i;
	for (i=0;i<SEARCHHASHSIZE;i++) 
		hashoffsets[i] = readlong(fp);
	fgetc(fp); /* Jump '\n' */
}

/* Reads the stopwords in the index file.
*/

void readstopwords(fp)
FILE *fp;
{
	int len;
	static int lenword=0;
	static char *word=NULL;
	
	if(!lenword) word = (char *) emalloc((lenword=MAXWORDLEN) + 1);
	fseek(fp, offsets[STOPWORDPOS], 0);
	uncompress(len,fp);
	while (len) {
		if(len>=lenword) {
			lenword*=len + 200;
			word = (char *) erealloc(word,lenword+1);
		}
		fread(word,len,1,fp);
		word[len]='\0';
		addstophash(word);
		uncompress(len,fp);
	}
}

/* Reads the metaNames from the index
*/

void readMetaNames(fp)
FILE *fp;
{
	int len;
	int wordlen,docPropStyle;
	char *word;

	wordlen = MAXWORDLEN;
	word=(char *)emalloc(MAXWORDLEN +1);
	
	fseek(fp, offsets[METANAMEPOS], 0);
	uncompress(len,fp);
	while ( len )
	{
		if(len>=wordlen) {
			wordlen=len+200;
			word = (char *) erealloc(word,wordlen+1);
		}
		fread(word,len,1,fp);
		word[len]='\0';
		docPropStyle=0;
			/* parse the meta name style:
			 * <name>"0   -> normal meta name [default]
			 * <name>"1   -> doc property name
			 * <name>"2   -> both
			 */
			/* It was saved as Style+1 */
		uncompress(docPropStyle,fp);
		docPropStyle--;
			/* add the meta tag, possible twice */
		if ((docPropStyle == 0) || (docPropStyle == 2))
			addMetaEntry(&metaEntryList, word, 0); /* as metaName */

		if ((docPropStyle == 1) || (docPropStyle == 2))
			addMetaEntry(&metaEntryList, word, 1);	/* as docProp */

		uncompress(len,fp);
	}
	efree(word);
}

/* Reads the file offset table in the index file.
*/

void readfileoffsets(fp)
FILE *fp;
{
	int j;
	long num;
	
	j = 0;
	fseek(fp, offsets[FILEOFFSETPOS], 0);
	num = 1L;
	while (num) {
		if((num = readlong(fp))) addtofilehashlist(j++, num);
	}
}

/* The recursive parsing function.
** This was a headache to make but ended up being surprisingly easy. :)
** parseone tells the function to only operate on one word or term.
*/

struct result *parseterm(fp, parseone, metaName)
FILE *fp;
int parseone;
int metaName;
{
	int rulenum;
	char *word;
	int lenword;
	struct result *rp, *newrp;
	/*
	 * The andLevel is used to help keep the ranking function honest
	 * when it ANDs the results of the latest search term with
	 * the results so far (rp).  The idea is that if you AND three
	 * words together you ultimately want to resulting rank to
	 * be the average of all three individual work ranks. By keeping
	 * a running total of the number of terms already ANDed, the
	 * next AND operation can properly scale the average-rank-so-far
	 * and recompute the new average properly (see andresultlists()).
	 * This implementation is a little weak in that it will not average
	 * across terms that are in parenthesis. (It treats an () expression
	 * as one term, and weights it as "one".)
	 */
	int andLevel = 0;	/* number of terms ANDed so far */

	word = NULL;
	lenword = 0;

	rp = NULL;
	
	rulenum = OR_RULE;
	while (searchwordlist != NULL) {
		word = SafeStrCopy(word, searchwordlist->line,&lenword);
		
		if (rulenum == NO_RULE)
			rulenum = DEFAULT_RULE;
		if (isunaryrule(word)) {
			searchwordlist = searchwordlist->next;
			rp = (struct result *) parseterm(fp, 1, metaName);
			rp = (struct result *) notresultlist(rp, fp);
			/* Wild goose chase */
			rulenum = NO_RULE;
			continue;
		}
		else if (isbooleanrule(word)) {
			rulenum = getrulenum(word);
			searchwordlist = searchwordlist->next;
			continue;
		}
		
		if (rulenum != AND_RULE)
			andLevel = 0;	/* reset */
		else if (rulenum == AND_RULE)
			andLevel++;
		
		if (word[0] == '(') {
			
			searchwordlist = searchwordlist->next;
			newrp = (struct result *) parseterm(fp, 0, metaName);
			
			if (rulenum == AND_RULE)
				rp = (struct result *)
				andresultlists(rp, newrp, andLevel);
			else if (rulenum == OR_RULE)
				rp = (struct result *)
				orresultlists(rp, newrp);
			else if (rulenum == PHRASE_RULE)
				rp = (struct result *)
				phraseresultlists(rp, newrp,1);
			else if (rulenum == AND_NOT_RULE)
				rp = (struct result *)
				notresultlists(rp, newrp);

			if (searchwordlist == NULL)
				break;
			
			rulenum = NO_RULE;
			continue;
			
		}
		else if (word[0] == ')') {
			searchwordlist = searchwordlist->next;
			break;
		}
		
		/* Check if the next word is '=' */
		if ( isMetaName(searchwordlist->next) ) {
			metaName = getMetaName(word);
			if (metaName == 1){
				printf ("err: The metaName %s doesn't exist in  user configfile\n", word);
				exit(0);
			}
			/* Skip both the metaName end the '=' */
			searchwordlist = searchwordlist->next->next;
			/* 11/00 jmruiz -> Fixes not and parenthesys bug
                        ** repported by Bill Moseley */
			if (searchwordlist && searchwordlist->line[0] == '(') 
			{
				searchwordlist = searchwordlist->next;
				parseone=0;
			} else
				parseone=1;
			newrp = (struct result *) parseterm(fp, parseone, metaName);
			if (rulenum == AND_RULE)
				rp = (struct result *) andresultlists(rp, newrp, andLevel);
			else if (rulenum == OR_RULE)
				rp = (struct result *) orresultlists(rp, newrp);
			else if (rulenum == PHRASE_RULE)
				rp = (struct result *) phraseresultlists(rp, newrp,1);
			else if (rulenum == AND_NOT_RULE)
				rp = (struct result *)notresultlists(rp, newrp);
			
			if (searchwordlist == NULL)
				break;
			
			rulenum = NO_RULE;
			metaName = 1;
			continue;
		}
	
		rp = (struct result *) operate(rp, rulenum, word, 
					       fp, metaName,
						   andLevel);
		
		if (parseone) {
			searchwordlist = searchwordlist->next;
			break;
		}
		rulenum = NO_RULE;
		
		searchwordlist = searchwordlist->next;
	}
	
	if(lenword) efree(word);
	return rp;
}

/* Looks up a word in the index file -
** it calls getfileinfo(), which does the real searching.
*/

struct result *operate(rp, rulenum, word, fp, metaName, andLevel)
struct result *rp;
int rulenum;
char *word;
FILE *fp;
int metaName;
int andLevel;
{
	int i, found;
	struct result *newrp, *returnrp;

	newrp = returnrp = NULL;

	if (applyStemmingRules)
	{
		/* apply stemming algorithm to the search term */
		i=strlen(word)-1;
		if(i && word[i]=='*') word[i]='\0';
		else i=0; /* No star */
		Stem(word, MAXWORDLEN); /* CAREFUL! word length is assumed */
		if(i && (strlen(word)-1)<MAXWORDLEN) strcat(word,"*"); 
	}
        if (applySoundexRules)
        {
                /* apply soundex algorithm to the search term */
		i=strlen(word)-1;
		if(i && word[i]=='*') word[i]='\0';
		else i=0; /* No star */
                soundex(word);   /* Need to fix word length ? */
		if(i && (strlen(word)-1)<MAXWORDLEN) strcat(word,"*"); 
        }

	if (isstopword(word) && !isrule(word)) 
	{
		if (rulenum == OR_RULE && rp != NULL)
			return rp;
		else
			commonerror = 1;
	}
	
	for (i = found = 0; indexchars[i] != '\0'; i++)
	{
		if (word[0] == indexchars[i]) 
		{
			found = 1;
			break;
		}
	}
	if (!found) 
	{
		if (rulenum == AND_RULE || rulenum == PHRASE_RULE)
			return NULL;
		else if (rulenum == OR_RULE)
			return rp;
	}
	
	if (rulenum == AND_RULE) {
		newrp = (struct result *) getfileinfo(word, fp, metaName);
		returnrp = (struct result *) andresultlists(rp, newrp, andLevel);
	} else if (rulenum == OR_RULE) {
		newrp = (struct result *) getfileinfo(word, fp, metaName);
		returnrp = (struct result *) orresultlists(rp, newrp);
	} else if (rulenum == NOT_RULE) {
		newrp = (struct result *) getfileinfo(word, fp, metaName);
		returnrp = (struct result *) notresultlist(newrp, fp);
	} else if (rulenum == PHRASE_RULE) {
		newrp = (struct result *) getfileinfo(word, fp, metaName);
		returnrp = (struct result *) phraseresultlists(rp, newrp, 1);
	} else if (rulenum == AND_NOT_RULE) {
		newrp = (struct result *) getfileinfo(word, fp, metaName);
		returnrp = (struct result *) notresultlists(rp, newrp);
	}
	return returnrp;
}

/* Looks up a file name in the index file.
*/

char *lookupfile(filenum, fp, propPos)
     int filenum;
     FILE *fp;
     long *propPos;
{
static int lenline=0;
static char *line=NULL;
long pos;
static int lenbuf1=0;
static int lenbuf2=0;
static char *buf1=NULL;
static char *buf2=NULL;
int len1,len2,bytes;
        if(!lenbuf1) buf1=emalloc((lenbuf1=MAXSTRLEN)+1);
        if(!lenbuf2) buf2=emalloc((lenbuf2=MAXSTRLEN)+1);
	if(!lenline) line = emalloc((lenline=MAXSTRLEN) + 1);

	pos = getfilenum(decodefilenum(filenum) - 1);
	fseek(fp, 0, 0);  /* Do I have a buggy gcc ? */
	fseek(fp, pos, 0);

        uncompress(len1,fp);   /* Read length of filename */
        if(len1>=lenbuf1) {
                lenbuf1 = len1 +200;
                buf1 = erealloc(buf1,lenbuf1+1);
        }
        fread(buf1,len1,1,fp);   /* Read filename */
        buf1[len1] = '\0';
        uncompress(len2,fp);   /* Read length of title */
        if(len2>=lenbuf2) {
                lenbuf2 = len2 +200;
                buf2 = erealloc(buf2,lenbuf2+1);
        }
        fread(buf2,len2,1,fp);     /* Read title */
        buf2[len2] = '\0';
        uncompress(bytes,fp);           /* Read size */
	bytes--;

        if ((len1 + len2 + 100) >= lenline) {
                lenline = len1 + len2 + 200;
                line = (char *) erealloc(line, lenline + 1);
        }
        sprintf(line,"%s \"%s\" %d\n",buf1,buf2,bytes);
	
	if (propPos != NULL)
		*propPos = ftell(fp);
	
	return line;
}

/* Finds a word and returns its corresponding file and rank information list.
** If not found, NULL is returned.
*/
/* Jose Ruiz
** New implmentation based on Hashing for direct access. Faster!!
** Also solves stars. Faster!! It can even found "and", "or"
** when looking for "an*" or "o*" if they are not stop words
*/
struct result *getfileinfo(word, fp, metaName)
char *word;
FILE *fp;
int metaName;
{
	int i, j, x, rank, filenum, structure, frequency,  *position, tries, found, len, curmetaname;
	static int filewordlen = 0;
	static char *fileword = NULL;
	struct result *rp, *rp2, *tmp;
	int res, wordlen;
	unsigned hashval;
	long offset,nextposmetaname;
	char *p;

	x=j=rank=filenum=structure=frequency=tries=len=curmetaname=0;
	position=NULL;
	nextposmetaname=0L;

        if(!filewordlen) fileword = (char *) emalloc((filewordlen=MAXWORDLEN) + 1);

	rp = rp2 = NULL;
		/* First: Look for star */
	if(!(p=strchr(word,'*'))) {
			/* If there is not a star use the hash approach ... */
		res = 1;
		tries = 0;
			/* Get hash file offset */
		hashval = searchhash(word);
		if(!(offset=hashoffsets[hashval])) return(NULL); /* NOt found */
			/* Search for word */
		while (res) {
			/* tries is just to see how hash works and store hash tries */
			tries++;
			/* Position in file */
			fseek(fp,offset,0);
			/* Get word */
			uncompress(wordlen,fp);
			if(wordlen > filewordlen) {
				filewordlen = wordlen + 100;
				fileword = (char *) erealloc(fileword,filewordlen + 1);
			}
			fread(fileword,1,wordlen,fp);
			fileword[wordlen]='\0';
			offset = readlong(fp);  /* Next hash */
			if(!(res=strcmp(word,fileword))) break;  /* Found !! */
			else if (!offset) return NULL; /* No more entries if NULL*/
		}
	}
	else 
	{	/* There is a star. So use the sequential approach */
		if(p == word) 
		{
			printf("err: At least one non * must be specified in a term\n.\n");
			exit(0);
		}
		len = p - word;
		for (i = found = 0; indexchars[i] != '\0'; i++) {
			if (word[0] == indexchars[i]) {
				if(!offsets[i]) return NULL;
				fseek(fp, offsets[i], 0);
				found = 1;
				break;
			}
		}
		if (!found)
			return NULL;

	        if (offsets[STOPWORDPOS] == ftell(fp))
			return NULL;

		/* Look for first occurrence */
	        uncompress(wordlen,fp); 
		if(wordlen > filewordlen) {
			filewordlen = wordlen + 100;
			fileword = (char *) erealloc(fileword,filewordlen + 1);
		}
	        while (wordlen) {
			fread(fileword,1,wordlen,fp);
			fileword[wordlen]='\0';
			readlong(fp);    /* jump hash offset */
			if(!(res=strncmp(word,fileword,len))) break; /*Found!!*/
			if (res < 0) return NULL;  /* Not found */
				/* Jump till next value */
			uncompress(x,fp);
			while(x){
				nextposmetaname=readlong(fp);
				fseek(fp,nextposmetaname,0);
				uncompress(x,fp);
			}
			if (offsets[STOPWORDPOS] == ftell(fp))
				return NULL;  /* not found */
	        	uncompress(wordlen,fp);  /* Next word */
			if(wordlen > filewordlen) {
				filewordlen = wordlen + 100;
				fileword = (char *) erealloc(fileword,filewordlen + 1);
			}
		}
	}
		/* If code is here we have found the word !! */
	do {
		/* Now look for a correct Metaname */
		uncompress(curmetaname,fp); 
		while(curmetaname) {
			nextposmetaname=readlong(fp);
			if(curmetaname>=metaName) break;
			fseek(fp,nextposmetaname,0);
			uncompress(curmetaname,fp); 
		}
		if(curmetaname==metaName) found=1;
		else found=0;
		if(found) {
			do {   /* Read on all items */
				uncompress(filenum,fp);
				uncompress(rank,fp);
				uncompress(structure,fp);
				uncompress(frequency,fp);
				position=(int *)emalloc(frequency*sizeof(int));
				for(j=0;j<frequency;j++) {
					uncompress(x,fp);
					position[j] = x;
				}
				rp = (struct result *) addtoresultlist(rp, filenum, rank, structure,frequency,position);
				if (verbose == 4)
				{
					/* dump diagnostic info */
					char* pos;
					char* fileinfo;
					long curFilePos;
					curFilePos = ftell(fp);	/* save */
					fileinfo = lookupfile(filenum, fp, NULL);
					pos = strchr(fileinfo, '\"');	/* after file name */
					if (pos)
						*(pos-1) = '\0';	/* truncate */
					printf("# diag\tFILE: %s\tWORD: %s\tRANK: %d\tFREQUENCY: %d\t HASH ITEM: %d\n", fileinfo, word, rank, frequency, tries);
					if (pos)
						*(pos-1) = ' ';	/* restore */
					fseek(fp, curFilePos, 0); /* restore */
				}
			} while(ftell(fp)!=nextposmetaname);
		}
		if(!p) break;   /* direct access -> break */
		else {
			/* Jump to next word */
			/* No more data for this word but we
			are in sequential search because of
			the star (p is not null) */
			/* So, look for next occurrence */
			if(!found) fseek(fp,nextposmetaname,0);
			uncompress(curmetaname,fp);
			while(curmetaname) {
				nextposmetaname=readlong(fp);
				fseek(fp,nextposmetaname,0);
				uncompress(curmetaname,fp);
			}
			if (offsets[STOPWORDPOS] == ftell(fp)) 
				break; /* no more data */
	        	uncompress(wordlen,fp);
			if(wordlen > filewordlen) {
				filewordlen = wordlen + 100;
				fileword = (char *) erealloc(fileword,filewordlen + 1);
			}
			fread(fileword,1,wordlen,fp);
			fileword[wordlen] = '\0';
			readlong(fp);    /* jump hash offset */
			res=strncmp(word,fileword,len);
			if (res) break;  /* No more data */
		}
	} while(1);
	if (p) {
			/* Finally, if we are in an sequential search
			merge all results */
		initresulthashlist();
		rp2 = NULL;
	 	while (rp != NULL) {
			tmp = rp->next;
	                mergeresulthashlist(rp);
	        	rp=tmp;
	  	}
		for (i = 0; i < BIGHASHSIZE; i++) {
			rp = resulthashlist[i];
	                while (rp != NULL) {
				rp2 = (struct result *) addtoresultlist(rp2,
	                	rp->filenum, rp->rank, rp->structure,
			        rp->frequency, rp->position);
				tmp = rp->next;
				/* Do not free position in freeresult
				It was added to rp2 !! */
				rp->position = NULL;
				freeresult(rp);
				rp = tmp;
			}
		}
		rp =rp2;
	}
	return rp;
}

/* Is a word a rule?
*/

int isrule(word)
char *word;
{
	if (!strcmp(word, AND_WORD) || !strcmp(word, OR_WORD) || !strcmp(word, NOT_WORD) || !strcmp(word, PHRASE_WORD) || !strcmp(word, AND_NOT_WORD))
		return 1;
	else
		return 0;
}

int isnotrule(word)
char *word;
{
	if (!strcmp(word,NOT_WORD) )
		return 1;
	else
		return 0;
}


/* Is a word a boolean rule?
*/

int isbooleanrule(word)
char *word;
{
	if (!strcmp(word, AND_WORD) || !strcmp(word, OR_WORD) || !strcmp(word, PHRASE_WORD) || !strcmp(word, AND_NOT_WORD))
		return 1;
	else
		return 0;
}

/* Is a word a unary rule?
*/

int isunaryrule(word)
char *word;
{
	if (!strcmp(word, NOT_WORD))
		return 1;
	else
		return 0;
}

/* Return the number for a rule.
*/

int getrulenum(word)
char *word;
{
	if (!strcmp(word, AND_WORD))
		return AND_RULE;
	else if (!strcmp(word, OR_WORD))
		return OR_RULE;
	else if (!strcmp(word, NOT_WORD))
		return NOT_RULE;
	else if (!strcmp(word, PHRASE_WORD))
		return PHRASE_RULE;
	else if (!strcmp(word, AND_NOT_WORD))
		return AND_NOT_RULE;
	return NO_RULE;
}

/* Takes two lists of results from searches and ANDs them together.
*/

struct result *andresultlists(r1, r2, andLevel)
     struct result *r1;
     struct result *r2;
     int andLevel;
{
struct result *tmpnode, *newnode, *r1b, *r2b;
int res=0;
	
	if (r1 == NULL || r2 == NULL)
		return NULL;
	
	newnode = NULL;
	if (andLevel < 1)
		andLevel = 1;
	/* Jose Ruiz 06/00
	** Sort r1 and r2 by filenum for better performance */
	r1=sortresultsbyfilenum(r1);
	r2=sortresultsbyfilenum(r2);
	/* Jose Ruiz 04/00 -> Preserve r1 and r2 for further proccesing */
	r1b = r1;
	r2b = r2;
	
	for(;r1 && r2;) {
		res=r1->filenum - r2->filenum;
		if(!res) {
			/*
			 * Computing the new rank is interesting because
			 * we want to weight each of the words that was
			 * previously ANDed equally along with the new word.
			 * We compute a running average using andLevel and
			 * simply scale up the old average (in r1->rank)
			 * and recompute a new, equally weighted average.
			 */
			int newRank=0;
			int *allpositions;
			newRank = ((r1->rank * andLevel) + r2->rank) / (andLevel+1);
			/*
			* Storing all positions could be useful
			* in the future
			*/
			allpositions=(int *)emalloc((r1->frequency+r2->frequency)*sizeof(int));
			CopyPositions(allpositions,0,r1->position,0,r1->frequency);
			CopyPositions(allpositions,r1->frequency,r2->position,0,r2->frequency);
			newnode = (struct result *) addtoresultlist(newnode, r1->filenum, newRank, r1->structure & r2->structure, r1->frequency + r2->frequency, allpositions);
			r1 = r1->next;
			r2 = r2->next;
		} else if(res>0) {
			r2 = r2->next;
		} else {
			r1 = r1->next;
		}
	}
			/* Jose Ruiz 04/00 Free memory no longer needed */
	while (r1b) {
		tmpnode = r1b->next;
		freeresult(r1b);
		r1b = tmpnode;
	}
	while (r2b) {
		tmpnode = r2b->next;
		freeresult(r2b);
		r2b = tmpnode;
	}
	return newnode;
}

/* Takes two lists of results from searches and ORs them together.
*/

struct result *orresultlists(r1, r2)
struct result *r1;
struct result *r2;
{
	int i;
	struct result *rp, *tmp;
	static struct result *newnode;

	newnode = NULL;
	
	if (r1 == NULL)
		return r2;
	else if (r2 == NULL)
		return r1;
	
	initresulthashlist();
	while (r1 != NULL) {
		tmp = r1->next;  /* Save pointer now because memory can be
				 ** freed in mergeresulthashlist */
		mergeresulthashlist(r1);
		r1=tmp;
	}
	while (r2 != NULL) {
		tmp = r2->next;
		mergeresulthashlist(r2);
		r2=tmp;
	}
	for (i = 0; i < BIGHASHSIZE; i++) {
		rp = resulthashlist[i];
		while (rp != NULL) {
			newnode = (struct result *) addtoresultlist(newnode,
				rp->filenum, rp->rank, rp->structure,
				rp->frequency, rp->position);
			tmp = rp->next;
				/* Do not free position in freeresult 
				It was added to newnode !! */
			rp->position = NULL;
			freeresult(rp);
			rp = tmp;
		}
	}
	return newnode;
}

/* This performs the NOT unary operation on a result list.
** NOTed files are marked with a default rank of 1000.
**
** Basically it returns all the files that have not been
** marked (GH)
*/

struct result *notresultlist(rp, fp)
struct result *rp;
FILE *fp;
{
	int i, filenums;
	struct result *newp;
	
	newp = NULL;
	initmarkentrylist();
	while (rp != NULL) {
		marknum(rp->filenum);
		rp = rp->next;
	}
	
	filenums = totalfiles;
	
	for (i = 1; i <= filenums; i++) {

		/* Patch from Bill Moseley Dec 21 1999 */
		/* if (!(i % 128) ) continue; */
		if (!ismarked(i))
			newp = (struct result *) addtoresultlist(newp, i, 1000, IN_ALL,0,NULL);
	}
	
	return newp;
}

struct result *phraseresultlists(r1, r2, distance)
     struct result *r1;
     struct result *r2;
     int distance;
{
	static struct result *tmpnode, *newnode, *r1b, *r2b;
	int i, j, found, newRank, *allpositions;
	int res=0;

	if (r1 == NULL || r2 == NULL)
		return NULL;
	
	newnode = NULL;
	
	r1=sortresultsbyfilenum(r1);
	r2=sortresultsbyfilenum(r2);
	r1b = r1;
	r2b = r2;
	for (;r1 && r2;) {
		res=r1->filenum - r2->filenum;
		if(!res){
			found = 0;
			allpositions = NULL;
			for(i=0;i<r1->frequency;i++)
			{
				for(j=0;j<r2->frequency;j++)
				{
					if((r1->position[i] + distance) == r2->position[j]) {
						found++;
						if (allpositions) allpositions = (int *) erealloc(allpositions,found*sizeof(int));
						else allpositions = (int *) emalloc(found*sizeof(int));
						allpositions[found-1] = r2->position[j];
						break;
					}
				}
			}
			if (found) {
				/* To do: Compute newrank */
				newRank = (r1->rank + r2->rank) / 2;
				/*
				* Storing positions is neccesary for further
				* operations
				*/
				newnode = (struct result *) addtoresultlist(newnode, r1->filenum, newRank, r1->structure & r2->structure, found, allpositions);
			}
			r1 = r1->next;
			r2 = r2->next;
		} else if(res>0) {
			r2 = r2->next;
		} else {
			r1 = r1->next;
		}
			
	}
		/* free unused memory */
	while (r1b) {
		tmpnode = r1b->next;
		freeresult(r1b);
		r1b = tmpnode;
	}
	while (r2b) {
		tmpnode = r2b->next;
		freeresult(r2b);
		r2b = tmpnode;
	}
	return newnode;
}

/* Adds a file number and rank to a list of results.
*/

struct result *addtoresultlist(rp, filenum, rank, structure, frequency, position)
struct result *rp;
int filenum;
int rank;
int structure;
int frequency;
int *position;
{
	struct result *newnode;
	static struct result *head;

	newnode = (struct result *) emalloc(sizeof(struct result));
	newnode->filenum = filenum;
	newnode->fileInfo = NULL;
	newnode->rank = rank;
	newnode->structure = structure;
	newnode->frequency = frequency;
	if (frequency && position)  newnode->position = position;
	else newnode->position = NULL;
	newnode->next = NULL;
	
	if (rp == NULL)
		rp = newnode;
	else
		head->next = newnode;
	
	head = newnode;
	
	return rp;
}

/* Adds the results of a search, sorts them by rank.
*/

/* Jose Ruiz 04/00
** Complete rewrite
** Sort was made before calling this function !! -> FASTER!!
** This one just reverses order
*/
struct result *addsortresult(sphead, r)
     struct result *sphead;
     struct result *r;
{
char* endOfLinePos;
	if (r->rank > bigrank)
		bigrank = r->rank;
	/* formatting search results is easier without the newline */
	endOfLinePos = strchr(r->fileInfo, '\n');
	if (endOfLinePos) *endOfLinePos = '\0';
	if (sphead == NULL) {
		r->nextsort = NULL;
	}
	else {
		r->nextsort = sphead;
	}
	return r;
}

/* Counts the number of files that are the result
   of a search
*/

int countResults(sp)
struct result *sp;
{
	int tot = 0;
	
	while (sp) {
		tot++;
		sp = sp->nextsort;
	}
	return(tot);
}

/* Prints the final results of a search.
*/

void printsortedresults(sp, num, fp)
     struct result *sp;
     double num;
     FILE* fp;
{
	int rank;
	
	while (sp) 
	{
		rank = (int) ((float) sp->rank * num);
		if (rank >= 999)
			rank = 1000;
		if (!beginhits) {
			if (resultmaxhits) 
			{
				if (useCustomOutputDelimiter)
				{
					/* parse fileinfo into filename and title */
					char* filename;
					char* title;
					char* endOfTitle = NULL;
					char* fileSize;
					filename = sp->fileInfo;
					title = strchr(filename, '\"');
					if (title == NULL)
					{
						title = "";
						fileSize = "0";
					}
					else
					{
						*(title-1) = '\0';	/* remove space between filename and title */
						title++;	/* past double quote */
						endOfTitle = strchr(title, '\"');	/* end of title */
						if (endOfTitle)
						{
							*endOfTitle = '\0';
							fileSize = endOfTitle+1;
							while (*fileSize == ' ')
								fileSize++;
						}
						else
						{
							fileSize = "0";
						}
					}
					printf("%d%s%s%s%s%s%s", (rank <= 0) ? 1 : rank, customOutputDelimiter, filename, customOutputDelimiter, title, customOutputDelimiter, fileSize);
					if (*title)
					{
						/* restore fileinfo... */
						*(--title) = ' ';	/* restore space */
						if (endOfTitle)
							*endOfTitle = '\"';
					}
				}
				else
				{
					printf("%d %s", (rank <= 0) ? 1 : rank, sp->fileInfo);
				}
				printSearchResultProperties(sp->prop);
				printf("\n");
				if (resultmaxhits > 0) resultmaxhits--; /* Modified DN 08/29/99  */
			}
		}
		if(beginhits) beginhits--;
		sp = sp->nextsort;
	}
}

/* Does an index file have a readable format?
*/

int isokindexheader(fp)
FILE *fp;
{
	char line[MAXSTRLEN];
	
	fseek(fp, 0, 0);
	fgets(line, MAXSTRLEN, fp);
	if (line[strlen(line) - 1] == '\n')
		line[strlen(line) - 1] = '\0';
	if (strcmp(line, INDEXHEADER)) {
		fseek(fp, 0, 0);
		return 0;
	}
	fseek(fp, 0, 0);
	return 1;
}


/* Returns the value associated with the metaName if it exists
*/

int getMetaName(word)
char * word;
{
	struct metaEntry* temp;
	
	for (temp = metaEntryList; temp != NULL; temp = temp->next) 
		if (!strcmp(temp->metaName, word))
			return temp->index;
	return 1;
}

/* Checks if the next word is "="
*/

int isMetaName (searchWord)
struct swline* searchWord;
{
	if (searchWord == NULL)
		return 0;
	if (!strcmp(searchWord->line, "=") )
		return 1;
	return 0;
}

/* funtion to free all memory of a list of results */
void freeresultlist(rp)
struct result *rp;
{
struct result *tmp;
	while(rp) {
		tmp = rp->next;
		freeresult(rp);
		rp =tmp;
	}
}

/* funtion to free the memory of one result */
void freeresult(rp)
struct result *rp;
{
	if(rp) 
	{
		if(rp->position) efree(rp->position);
		if(rp->fileInfo) efree(rp->fileInfo); 
		efree(rp);
	}
}

/* 
04/00 Jose Ruiz - Sort results by rank
Uses an array and qsort for better performance
*/
struct result *sortresultsbyrank(rp, structure, fp)
struct result *rp;
int structure;
FILE *fp;
{ 
int i, j;
unsigned char *ptmp,*ptmp2;
int *pi;
struct result *pv;
struct result *rtmp;
struct result *sortresultlist;
	              /* Very trivial case */
		if(!rp) return NULL;
			/* Compute results */
		for(i=0,rtmp=rp;rtmp;rtmp = rtmp->next) 
			if (rtmp->structure & structure) i++;
	              /* Another very trivial case */
		if (!i) return NULL;
			/* Compute array wide */
		sortresultlist = NULL;
		j=sizeof(int)+sizeof(void *);
			/* Compute array size */
		ptmp=(void *)emalloc(j*i);
			/* Build an array with the elements to compare
				 and pointers to data */
		for(ptmp2=ptmp,rtmp=rp;rtmp;rtmp = rtmp->next) 
			if (rtmp->structure & structure) {
				pi=(int *)ptmp2;
				pi[0] = rtmp->rank;
				ptmp2+=sizeof(int);
				memcpy((char *)ptmp2,(char *)&rtmp,sizeof(struct result *));
				ptmp2+=sizeof(void *);
			}
			/* Sort them */
		qsort(ptmp,i,j,&icomp);
			/* Build the list */
		for(j=0,ptmp2=ptmp;j<i;j++){
				pi=(int *)ptmp2;
				ptmp2+=sizeof(int);
				memcpy((char *)&pv,(char*)ptmp2,sizeof(struct result *));
				ptmp2+=sizeof(void *);
				pv->fileInfo = estrdup(lookupfile(pv->filenum, fp, &pv->propPos));
				sortresultlist = (struct result *) addsortresult(sortresultlist, pv);
		}
			/* Free the memory od the array */
		efree(ptmp);
		return sortresultlist;
}

/* 
06/00 Jose Ruiz - Sort results by filenum
Uses an array and qsort for better performance
Used for faster "and" and "phrase" of results
*/
struct result *sortresultsbyfilenum(rp)
struct result *rp;
{ 
int i, j;
unsigned char *ptmp,*ptmp2;
int *pi;
struct result *pv;
struct result *rtmp;
	              /* Very trivial case */
		if(!rp) return NULL;
			/* Compute results */
		for(i=0,rtmp=rp;rtmp;rtmp = rtmp->next,i++);
	              /* Another very trivial case */
		if (i==1) return rp;
			/* Compute array wide */
		j=sizeof(int)+sizeof(void *);
			/* Compute array size */
		ptmp=(void *)emalloc(j*i);
			/* Build an array with the elements to compare
				 and pointers to data */
		for(ptmp2=ptmp,rtmp=rp;rtmp;rtmp = rtmp->next) {
			pi=(int *)ptmp2;
			pi[0] = rtmp->filenum;
			ptmp2+=sizeof(int);
			memcpy((char *)ptmp2,(char *)&rtmp,sizeof(struct result *));
			ptmp2+=sizeof(void *);
		}
			/* Sort them */
		qsort(ptmp,i,j,&icomp);
			/* Build the list */
		for(j=0,rp=NULL,ptmp2=ptmp;j<i;j++){
			pi=(int *)ptmp2;
			ptmp2+=sizeof(int);
			memcpy((char *)&pv,(char*)ptmp2,sizeof(struct result *));
			ptmp2+=sizeof(void *);
			if(!rp)rp=pv;
			else 
				rtmp->next=pv;
			rtmp=pv;
			
		}
		rtmp->next=NULL;
			/* Free the memory of the array */
		efree(ptmp);
		return rp;
}

struct result *getproperties(rp, fp)
struct result *rp;
FILE *fp;
{
long propPos;
char* fileinfo;
struct result *tmp;

	tmp = rp;
	while(tmp) {
		fileinfo = lookupfile(tmp->filenum, fp, &propPos);
		getResultProperties(propPos,tmp->prop,fp);
		tmp = tmp->next;
	}
	return rp;
}

/* 06/00 Jose Ruiz
** returns all results in r1 that not contains r2 */
struct result *notresultlists(r1, r2)
     struct result *r1;
     struct result *r2;
{
struct result *tmpnode, *newnode, *r1b, *r2b;
int res=0;
int *allpositions;

	if (!r1 ) return NULL;
	if (r1 && !r2 ) return r1;
	
	newnode = NULL;
	r1=sortresultsbyfilenum(r1);
	r2=sortresultsbyfilenum(r2);
	/* Jose Ruiz 04/00 -> Preserve r1 and r2 for further proccesing */
	r1b = r1;
	r2b = r2;
	
	for(;r1 && r2;) {
		res=r1->filenum - r2->filenum;
		if(res<0) {
                        /*
                        * Storing all positions could be useful
                        * in the future
                        */
			allpositions=(int *)emalloc((r1->frequency)*sizeof(int));
			CopyPositions(allpositions,0,r1->position,0,r1->frequency);

			newnode = (struct result *) addtoresultlist(newnode, r1->filenum, r1->rank, r1->structure , r1->frequency, allpositions);
			r1 = r1->next;
		} else if(res>0) {
			r2 = r2->next;
		} else {
			r1 = r1->next;
			r2 = r2->next;
		}
	}
		/* Add remaining results */
	for(;r1;r1=r1->next) {
		allpositions=(int *)emalloc((r1->frequency)*sizeof(int));
		CopyPositions(allpositions,0,r1->position,0,r1->frequency);
		newnode = (struct result *) addtoresultlist(newnode, r1->filenum, r1->rank, r1->structure , r1->frequency, allpositions);
	}
			/* Free memory no longer needed */
	while (r1b) {
		tmpnode = r1b->next;
		freeresult(r1b);
		r1b = tmpnode;
	}
	while (r2b) {
		tmpnode = r2b->next;
		freeresult(r2b);
		r2b = tmpnode;
	}
	return newnode;
}

