36 #include "interpreter.h"
39 #include "identifier.h"
47 static Lexer *currLexer = 0;
53 #include "lexer.lut.h"
55 extern YYLTYPE yylloc;
60 return Lexer::curr()->lex();
65 size8(128), size16(128), restrKeyword(false),
66 convertNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
71 current(0), next1(0), next2(0), next3(0),
72 strings(0), numStrings(0), stringsCapacity(0),
73 identifiers(0), numIdentifiers(0), identifiersCapacity(0)
76 buffer8 =
new char[size8];
77 buffer16 =
new UChar[size16];
91 currLexer =
new Lexer();
97 void Lexer::globalClear()
104 void Lexer::setCode(
const UChar *c,
unsigned int len)
107 restrKeyword =
false;
109 convertNextIdentifier =
false;
118 #ifndef KJS_PURE_ECMA
123 current = (length > 0) ? code[0].uc : -1;
124 next1 = (length > 1) ? code[1].uc : -1;
125 next2 = (length > 2) ? code[2].uc : -1;
126 next3 = (length > 3) ? code[3].uc : -1;
129 void Lexer::shift(
unsigned int p)
136 next3 = (pos + 3 < length) ? code[pos+3].uc : -1;
141 void Lexer::nextLine()
144 #ifndef KJS_PURE_ECMA
149 void Lexer::setDone(State s)
159 unsigned short stringType = 0;
168 if (stackToken >= 0) {
175 if (skipLF && current !=
'\n')
177 if (skipCR && current !=
'\r')
179 if (skipLF || skipCR)
186 bool cr = (current ==
'\r');
187 bool lf = (current ==
'\n');
192 bool isLineTerminator = cr || lf;
196 if (isWhiteSpace(current)) {
198 }
else if (current ==
'/' && next1 ==
'/') {
200 state = InSingleLineComment;
201 }
else if (current ==
'/' && next1 ==
'*') {
203 state = InMultiLineComment;
204 }
else if (current == -1) {
205 if (!terminator && !delimited) {
212 }
else if (isLineTerminator) {
219 }
else if (current ==
'"' || current ==
'\'') {
221 stringType = current;
222 }
else if (isIdentLetter(current)) {
224 state = InIdentifierOrKeyword;
225 }
else if (current ==
'\\') {
226 state = InIdentifierUnicodeEscapeStart;
227 }
else if (current ==
'0') {
230 }
else if (isDecimalDigit(current)) {
233 }
else if (current ==
'.' && isDecimalDigit(next1)) {
236 #ifndef KJS_PURE_ECMA
238 }
else if (current ==
'<' && next1 ==
'!' &&
239 next2 ==
'-' && next3 ==
'-') {
241 state = InSingleLineComment;
243 }
else if (bol && current ==
'-' && next1 ==
'-' && next2 ==
'>') {
245 state = InSingleLineComment;
248 token = matchPunctuator(current, next1, next2, next3);
258 if (current == stringType) {
261 }
else if (current == -1 || isLineTerminator) {
263 }
else if (current ==
'\\') {
264 state = InEscapeSequence;
270 case InEscapeSequence:
271 if (isOctalDigit(current)) {
272 if (current >=
'0' && current <=
'3' &&
273 isOctalDigit(next1) && isOctalDigit(next2)) {
274 record16(convertOctal(current, next1, next2));
277 }
else if (isOctalDigit(current) && isOctalDigit(next1)) {
278 record16(convertOctal(
'0', current, next1));
281 }
else if (isOctalDigit(current)) {
282 record16(convertOctal(
'0',
'0', current));
287 }
else if (current ==
'x')
289 else if (current ==
'u')
290 state = InUnicodeEscape;
292 if (isLineTerminator)
294 record16(singleEscape(current));
299 if (isHexDigit(current) && isHexDigit(next1)) {
301 record16(convertHex(current, next1));
303 }
else if (current == stringType) {
313 case InUnicodeEscape:
314 if (isHexDigit(current) && isHexDigit(next1) &&
315 isHexDigit(next2) && isHexDigit(next3)) {
316 record16(convertUnicode(current, next1, next2, next3));
319 }
else if (current == stringType) {
327 case InSingleLineComment:
328 if (isLineTerminator) {
336 }
else if (current == -1) {
340 case InMultiLineComment:
343 }
else if (isLineTerminator) {
345 }
else if (current ==
'*' && next1 ==
'/') {
350 case InIdentifierOrKeyword:
352 if (isIdentLetter(current) || isDecimalDigit(current))
354 else if (current ==
'\\')
355 state = InIdentifierUnicodeEscapeStart;
357 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword :
Identifier);
360 if (current ==
'x' || current ==
'X') {
363 }
else if (current ==
'.') {
366 }
else if (current ==
'e' || current ==
'E') {
368 state = InExponentIndicator;
369 }
else if (isOctalDigit(current)) {
372 }
else if (isDecimalDigit(current)) {
380 if (isHexDigit(current)) {
387 if (isOctalDigit(current)) {
390 else if (isDecimalDigit(current)) {
397 if (isDecimalDigit(current)) {
399 }
else if (current ==
'.') {
402 }
else if (current ==
'e' || current ==
'E') {
404 state = InExponentIndicator;
409 if (isDecimalDigit(current)) {
411 }
else if (current ==
'e' || current ==
'E') {
413 state = InExponentIndicator;
417 case InExponentIndicator:
418 if (current ==
'+' || current ==
'-') {
420 }
else if (isDecimalDigit(current)) {
427 if (isDecimalDigit(current)) {
432 case InIdentifierUnicodeEscapeStart:
434 state = InIdentifierUnicodeEscape;
438 case InIdentifierUnicodeEscape:
439 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
440 record16(convertUnicode(current, next1, next2, next3));
442 state = InIdentifier;
448 assert(!
"Unhandled state in switch statement");
454 #ifndef KJS_PURE_ECMA
455 if (state != Start && state != InSingleLineComment)
461 if ((state ==
Number || state == Octal || state == Hex)
462 && isIdentLetter(current))
466 buffer8[pos8] =
'\0';
469 fprintf(stderr,
"line: %d ", lineNo());
470 fprintf(stderr,
"yytext (%x): ", buffer8[0]);
471 fprintf(stderr,
"%s ", buffer8);
474 long double dval = 0;
476 dval = kjs_strtod(buffer8, 0L);
477 }
else if (state == Hex) {
479 if (buffer8[0] ==
'0' && (buffer8[1] ==
'x' || buffer8[1] ==
'X')) {
480 for (
const char *p = buffer8+2; *p; p++) {
481 if (!isHexDigit(*p)) {
485 dval = dval * 16 + convertHex(*p);
489 }
else if (state == Octal) {
491 if (buffer8[0] ==
'0') {
492 for (
const char *p = buffer8+1; *p; p++) {
493 if (*p < '0' || *p >
'7') {
497 dval = dval * 8 + *p -
'0';
512 case IdentifierOrKeyword:
513 printf(
"(Identifier)/(Keyword)\n");
516 printf(
"(String)\n");
519 printf(
"(Number)\n");
526 if (state !=
Identifier && state != IdentifierOrKeyword &&
527 convertNextIdentifier)
528 convertNextIdentifier =
false;
530 restrKeyword =
false;
532 kjsyylloc.first_line = yylineno;
533 kjsyylloc.last_line = yylineno;
540 if(token ==
'}' || token ==
';') {
544 case IdentifierOrKeyword:
545 if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
549 if (convertNextIdentifier) {
550 convertNextIdentifier =
false;
552 UString debugstr(buffer16, pos16); fprintf(stderr,
"Anonymous function hack: eating identifier %s\n",debugstr.ascii());
554 token = FUNCEXPRIDENT;
559 kjsyylval.ident = makeIdentifier(buffer16, pos16);
563 convertNextIdentifier =
false;
567 if ( token == FUNCTION &&
568 ( lastToken ==
'=' || lastToken ==
',' || lastToken ==
'(' ||
569 lastToken ==
':' || lastToken == RETURN ) )
570 convertNextIdentifier =
true;
572 if (token == CONTINUE || token == BREAK ||
573 token == RETURN || token == THROW)
577 kjsyylval.ustr = makeUString(buffer16, pos16);
581 kjsyylval.dval = dval;
588 assert(!
"unhandled numeration value in switch");
595 bool Lexer::isWhiteSpace(
unsigned short c)
597 return (c ==
' ' || c ==
'\t' ||
598 c == 0x0b || c == 0x0c || c == 0xa0);
601 bool Lexer::isIdentLetter(
unsigned short c)
608 return (c >=
'a' && c <=
'z' ||
609 c >=
'A' && c <=
'Z' ||
611 c >= 0x00c0 && c <= 0x00d6 ||
613 c >= 0x00d8 && c <= 0x00f6 ||
615 c >= 0x00f8 && c <= 0x02af ||
617 c >= 0x0388 && c <= 0x1ffc ||
618 c ==
'$' || c ==
'_');
622 bool Lexer::isDecimalDigit(
unsigned short c)
624 return (c >=
'0' && c <=
'9');
627 bool Lexer::isHexDigit(
unsigned short c)
629 return (c >=
'0' && c <=
'9' ||
630 c >=
'a' && c <=
'f' ||
631 c >=
'A' && c <=
'F');
634 bool Lexer::isOctalDigit(
unsigned short c)
636 return (c >=
'0' && c <=
'7');
639 int Lexer::matchPunctuator(
unsigned short c1,
unsigned short c2,
640 unsigned short c3,
unsigned short c4)
642 if (c1 ==
'>' && c2 ==
'>' && c3 ==
'>' && c4 ==
'=') {
645 }
else if (c1 ==
'=' && c2 ==
'=' && c3 ==
'=') {
648 }
else if (c1 ==
'!' && c2 ==
'=' && c3 ==
'=') {
651 }
else if (c1 ==
'>' && c2 ==
'>' && c3 ==
'>') {
654 }
else if (c1 ==
'<' && c2 ==
'<' && c3 ==
'=') {
657 }
else if (c1 ==
'>' && c2 ==
'>' && c3 ==
'=') {
660 }
else if (c1 ==
'<' && c2 ==
'=') {
663 }
else if (c1 ==
'>' && c2 ==
'=') {
666 }
else if (c1 ==
'!' && c2 ==
'=') {
669 }
else if (c1 ==
'+' && c2 ==
'+') {
675 }
else if (c1 ==
'-' && c2 ==
'-') {
678 return AUTOMINUSMINUS;
681 }
else if (c1 ==
'=' && c2 ==
'=') {
684 }
else if (c1 ==
'+' && c2 ==
'=') {
687 }
else if (c1 ==
'-' && c2 ==
'=') {
690 }
else if (c1 ==
'*' && c2 ==
'=') {
693 }
else if (c1 ==
'/' && c2 ==
'=') {
696 }
else if (c1 ==
'&' && c2 ==
'=') {
699 }
else if (c1 ==
'^' && c2 ==
'=') {
702 }
else if (c1 ==
'%' && c2 ==
'=') {
705 }
else if (c1 ==
'|' && c2 ==
'=') {
708 }
else if (c1 ==
'<' && c2 ==
'<') {
711 }
else if (c1 ==
'>' && c2 ==
'>') {
714 }
else if (c1 ==
'&' && c2 ==
'&') {
717 }
else if (c1 ==
'|' && c2 ==
'|') {
748 return static_cast<int>(c1);
754 unsigned short Lexer::singleEscape(
unsigned short c)
const
780 unsigned short Lexer::convertOctal(
unsigned short c1,
unsigned short c2,
781 unsigned short c3)
const
783 return ((c1 -
'0') * 64 + (c2 -
'0') * 8 + c3 -
'0');
786 unsigned char Lexer::convertHex(
unsigned short c)
788 if (c >=
'0' && c <=
'9')
790 else if (c >=
'a' && c <=
'f')
791 return (c -
'a' + 10);
793 return (c -
'A' + 10);
796 unsigned char Lexer::convertHex(
unsigned short c1,
unsigned short c2)
798 return ((convertHex(c1) << 4) + convertHex(c2));
801 UChar Lexer::convertUnicode(
unsigned short c1,
unsigned short c2,
802 unsigned short c3,
unsigned short c4)
804 return UChar((convertHex(c1) << 4) + convertHex(c2),
805 (convertHex(c3) << 4) + convertHex(c4));
808 void Lexer::record8(
unsigned short c)
813 if (pos8 >= size8 - 1) {
814 char *tmp =
new char[2 * size8];
815 memcpy(tmp, buffer8, size8 *
sizeof(
char));
821 buffer8[pos8++] = (char) c;
824 void Lexer::record16(
int c)
828 record16(
UChar(
static_cast<unsigned short>(c)));
831 void Lexer::record16(
UChar c)
834 if (pos16 >= size16 - 1) {
836 memcpy(tmp, buffer16, size16 *
sizeof(
UChar));
842 buffer16[pos16++] = c;
845 bool Lexer::scanRegExp()
848 bool lastWasEscape =
false;
849 bool inBrackets =
false;
852 if (current ==
'\r' || current ==
'\n' || current == -1)
854 else if (current !=
'/' || lastWasEscape ==
true || inBrackets ==
true)
857 if ( !lastWasEscape ) {
858 if ( current ==
'[' && !inBrackets )
860 if ( current ==
']' && inBrackets )
865 !lastWasEscape && (current ==
'\\');
868 pattern =
UString(buffer16, pos16);
876 while (isIdentLetter(current)) {
880 flags =
UString(buffer16, pos16);
886 void Lexer::doneParsing()
888 for (
unsigned i = 0; i < numIdentifiers; i++) {
889 delete identifiers[i];
894 identifiersCapacity = 0;
896 for (
unsigned i = 0; i < numStrings; i++) {
905 const int initialCapacity = 64;
906 const int growthFactor = 2;
910 if (numIdentifiers == identifiersCapacity) {
911 identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
916 identifiers[numIdentifiers++] = identifier;
920 UString *Lexer::makeUString(
UChar *buffer,
unsigned int pos)
922 if (numStrings == stringsCapacity) {
923 stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
924 strings = (
UString **)realloc(strings,
sizeof(
UString *) * stringsCapacity);
928 strings[numStrings++] = string;
Represents an Identifier for a Javascript object.
Represents an primitive Number value.
Represents an primitive String value.