/* javalex.l -- Lexical analyser for a Java(tm)-like source language
 * Written by Charles Briscoe-Smith; refer to the file LEGAL for details.
 */

/*
How this scanner doesn't comply with chapter 3 of the Java language spec:

- Each input character is treated directly as a UnicodeInputCharacter,
  without doing the \uXXXX escaping specified in S3.3  (Unicode escaping
  is done as a special case in character and string literals)

- ASCII SUB is not treated specially at end of file.

- Documentation comments are not treated specially.

- Java letters are limited to [A-Za-z_$].  Java digits are limited
  to [0-9].  In other words, non-ASCII characters are treated as
  non-alphanumeric.

For bootstrapping purposes, this seems acceptable.

There has been no special effort to do error-checking in this scanner;
it assumes valid input.
*/

%{
#include "types.h"
#include "javagrammar.y.h"
#include "xmalloc.h"
#include "error.h"

#define YY_NO_UNPUT

static int char2num(char);
static void getnum(const char *, int, int, s8 *, int *);
static char * newstring(const char *, int);
static char * getstringlit(const char *, int);
static int getcharacterlit(const char **, int *);
static char * getfloat(const char *, int, int *);

extern int yylex(void);
%}

	extern int input_line;
	extern const char *input_file;

%e 1500
%p 3000

%x LCOM
%x TCOM
%x ERROR

JLETTER	[A-Za-z_$]
JDIGIT	[0-9]
JLORD	[A-Za-z_$0-9]
HEXDIG	[0-9a-zA-Z]
ESCLET	([btnfr"'\\]|[0-3]?[0-7]?[0-7]|u{HEXDIG}{HEXDIG}{HEXDIG}{HEXDIG})

%%

 /* Ignorable stuff: comments, white space and line terminators */

<INITIAL,TCOM>\n	|
<INITIAL,TCOM>\r	|
<INITIAL,TCOM>\r\n	/* printf("Line terminator\n"); */ input_line++;

[ \t\f]			/* printf("Whitespace\n"); */

"//"			/* puts("Line comment"); */ BEGIN(LCOM);
<LCOM>[^\r\n]		/* ignore */
<LCOM>\n		|
<LCOM>\r		|
<LCOM>\r\n		input_line++; BEGIN(INITIAL);

"/*"			/* puts("Trad/doc comment"); */ BEGIN(TCOM);
<TCOM>"*/"		BEGIN(INITIAL);
<TCOM>.			/* ignore */

 /* Everything else: identifiers, keywords, literals, separators, operators */

true			yylval.integer=1; return BOOLEANLITERAL;
false			yylval.integer=0; return BOOLEANLITERAL;

null			return NULLLITERAL;

abstract		return ABSTRACT;
boolean			return BOOLEAN;
break			return BREAK;
byte			return BYTE;
case			return CASE;
catch			return CATCH;
char			return CHAR;
class			return CLASS;
const			return CONST;
continue		return CONTINUE;
default			return DEFAULT;
do			return DO;
double			return DOUBLE;
else			return ELSE;
extends			return EXTENDS;
final			return FINAL;
finally			return FINALLY;
float			return FLOAT;
for			return FOR;
goto			return GOTO;
if			return IF;
implements		return IMPLEMENTS;
import			return IMPORT;
instanceof		return INSTANCEOF;
int			return INT;
interface		return INTERFACE;
long			return LONG;
native			return NATIVE;
new			return NEW;
package			return PACKAGE;
private			return PRIVATE;
protected		return PROTECTED;
public			return PUBLIC;
return			return RETURN;
short			return SHORT;
static			return STATIC;
super			return SUPER;
switch			return SWITCH;
synchronized		return SYNCHRONIZED;
this			return THIS;
throw			return THROW;
throws			return THROWS;
transient		return TRANSIENT;
try			return TRY;
void			return VOID;
volatile		return VOLATILE;
while			return WHILE;

{JLETTER}{JLORD}*	{ yylval.string=newstring(yytext, yyleng);
			  return IDENTIFIER;
			}

(0|[1-9][0-9]*)[lL]?	{ int longp;
			  /*printf("decimal integer literal `%s'\n",yytext);*/
			  getnum(yytext, yyleng, 10, &yylval.integer, &longp);
			  return longp ? LONGINTEGERLITERAL : INTEGERLITERAL;
			}

0[xX][0-9a-fA-F]+[lL]?	{ int longp;
			  /* printf("hex integer literal `%s'\n", yytext); */
			  getnum(yytext+2,yyleng-2,16,&yylval.integer,&longp);
			  return longp ? LONGINTEGERLITERAL : INTEGERLITERAL;
			}

0[0-7]+[lL]?	{ int longp;
		  /* printf("octal integer literal `%s'\n", yytext); */
		  getnum(yytext+1, yyleng-1, 8, &yylval.integer, &longp);
		  return longp ? LONGINTEGERLITERAL : INTEGERLITERAL;
		}

[0-9]+\.[0-9]*([eE][-+]?[0-9]+)?[fFdD]?		|
\.[0-9]+([eE][-+]?[0-9]+)?[fFdD]?		|
[0-9]+[eE][-+]?[0-9]+[fFdD]?			|
[0-9]+([eE][-+]?[0-9]+)?[fFdD]	{ int doublep;
				  yylval.string=getfloat(yytext, yyleng,
				                         &doublep);
				  return doublep?DOUBLELITERAL:FLOATLITERAL;
				}

'([^'\\\r\n]|\\{ESCLET})'	{ const char *txt=yytext+1;
				  int len=yyleng-2;
				  /*printf("character literal `%s'\n",yytext);*/
				  yylval.integer=getcharacterlit(&txt, &len);
				  return CHARACTERLITERAL;
				}

\"([^\r\n\\"]|\\{ESCLET})*\"	{ yylval.string=getstringlit(yytext+1,yyleng-2);
				  /* printf("string literal `%s'\n", yytext); */
				  return STRINGLITERAL;
				}

[(){}[\];,.=><!~?:+\-*/&|^%]	return yytext[0];

==		return EQUALEQUAL;
\<=		return LTHANEQUAL;
\>=		return GTHANEQUAL;
!=		return BANGEQUAL;
&&		return ANDAND;
\|\|		return PIPEPIPE;
\+\+		return PLUSPLUS;
--		return MINUSMINUS;

\<<		return LLTHAN;
\>>		return GGTHAN;
\>>>		return GGGTHAN;

\+=		return PLUSEQUAL;
-=		return MINUSEQUAL;
\*=		return STAREQUAL;
\/=		return SLASHEQUAL;
&=		return ANDEQUAL;
\|=		return PIPEEQUAL;
\^=		return CARETEQUAL;
%=		return PERCENTEQUAL;
\<<=		return LLTHANEQUAL;
\>>=		return GGTHANEQUAL;
\>>>=		return GGGTHANEQUAL;

.			BEGIN(ERROR); yyless(0);
<ERROR>.{1,20}		{ printf("%s: Scanning error at line %d before `%s'\n",
			         input_file, input_line, yytext);
			  error("stop");
			}

%%

#include <string.h>

int
yywrap(void)
{
#ifdef OLDFLEX
	(void) yyunput;
	(void) yy_push_state;
	(void) yy_pop_state;
	(void) yy_top_state;
#endif

	return 1;
}

static int
char2num(char c)
{
	if (c>='0' && c<='9') {
		return c-'0';
	} else if (c>='A' && c<='Z') {
		return c-'A'+10;
	} else /* if (c>='a' && c<='z') */ {
		return c-'a'+10;
	}
}

static void
getnum(const char *str, int len, int radix, s8 *num, int *longp)
{
	const char *end=str+len-1;
	if (*end=='l' || *end=='L') {
		*longp=1;
		end--;
	} else {
		*longp=0;
	}
	*num=0;
	while(str<=end) {
		*num=*num*radix+char2num(*str++);
	}
}

static char *
newstring(const char *str, int len)
{
	char *newstr, *newscan;
	const char *scan;
	const char *strend=&str[len];
	int newlen=0;
	for (scan=str; scan<strend; scan++) {
		newlen++;
		if (*scan=='$') newlen++;
	}
	newscan=newstr=(char *) xmalloc(newlen+1);
	while (str<strend) {
		if (*str=='$') {
			*newscan++='_';
			*newscan++='4';
			str++;
		} else {
			*newscan++=*str++;
		}
	}
	*newscan=0;
	return newstr;
}

static int
stringlitlength(const char *str, int len)
{
	int count=0;
	const char *end=&str[len];

	while (str<end) {
		if (*str++=='\\') {
			switch (*str++) {
			case 'b': case 't': case 'n': case 'f': case 'r':
			case '\"': case '\'': case '\\':
				break;
			case 'u':
				str+=4; break;
			case '0': case '1': case '2': case '3':
				if (*str>='0' && *str<='7') str++;
				/* fall through */
			case '4': case '5': case '6': case '7':
				if (*str>='0' && *str<='7') str++;
				break;
			}
		}
		count++;
	}

	return count;
}

static char *
getstringlit(const char *str, int len)
{
	int nchars=stringlitlength(str, len);
	char *newstr=(char *) xmalloc(nchars*2+2);
	char *scan=newstr;
	*scan++=(char) nchars/256;
	*scan++=(char) nchars%256;
	while (nchars-->0) {
		int ch=getcharacterlit(&str, &len);
		*scan++=(char) ch/256;
		*scan++=(char) ch%256;
	}
	return newstr;
}

static int
getcharacterlit(const char **str, int *len)
{
	if (**str!='\\') {
		(*len)--;
		return *(*str)++;
	}

	(*len)--;
	(*str)++;
	switch (**str) {
		case 'b':  (*str)++; (*len)--; return 0x8;
		case 't':  (*str)++; (*len)--; return 0x9;
		case 'n':  (*str)++; (*len)--; return 0xa;
		case 'f':  (*str)++; (*len)--; return 0xc;
		case 'r':  (*str)++; (*len)--; return 0xd;
		case '\"': (*str)++; (*len)--; return 0x22;
		case '\'': (*str)++; (*len)--; return 0x27;
		case '\\': (*str)++; (*len)--; return 0x5c;
	}

	if ((*str)[0]>='0' && (*str)[0]<='3' && (*str)[1]>='0' && (*str)[1]<='7'
	    && (*str)[2]>='0' && (*str)[2]<='7')
	{
		int rv=(((*str)[0]-'0')*8+((*str)[1]-'0'))*8+((*str)[2]-'0');
		(*len)-=3;
		(*str)+=3;
		return rv;
	} else if ((*str)[0]>='0' && (*str)[0]<='7'
	           && (*str)[1]>='0' && (*str)[1]<='7')
	{
		int rv=((*str)[0]-'0')*8+((*str)[1]-'0');
		(*len)-=2;
		(*str)+=2;
		return rv;
	} else if ((*str)[0]>='0' && (*str)[0]<='7') {
		int rv=(*str)[0]-'0';
		(*len)--;
		(*str)++;
		return rv;
	}

	if ((*str)[0]=='u') {
		int i, rv=0;
		for (i=1; i<=4; i++) {
			if ((*str)[i]>='0' && (*str)[i]<='9')
				rv=rv*16+(*str)[i]+0-'0';
			else if ((*str)[i]>='A' && (*str)[i]<='Z')
				rv=rv*16+(*str)[i]+10-'A';
			else if ((*str)[i]>='a' && (*str)[i]<='z')
				rv=rv*16+(*str)[i]+10-'a';
		}
		(*len)-=5;
		(*str)+=5;
		return rv;
	}

	thiscanthappen;
}

static char *
getfloat(const char *str, int len, int *doublep)
{
	const char *end=str+len-1;
	char *newfloat=(char *) xmalloc(len+2);
	const char *scan;
	char *newscan;

	if (*end=='d' || *end=='D') {
		*doublep=1;
		end--;
	} else if (*end=='f' || *end=='F') {
		*doublep=0;
		end--;
	} else {
		*doublep=1;
	}

	scan=str;
	newscan=newfloat;
	while (*scan>='0' && *scan<='9')
		*newscan++=*scan++;
	if (*scan=='.')
		*newscan++=*scan++;
	else
		*newscan++='.';
	while (scan<=end)
		*newscan++=*scan++;
	if (*doublep==0)
		*newscan++='f';
	*newscan=0;
	return newfloat;
}
