| 1 | /* |
| 2 | * Copyright (C) 1999-2000 Harri Porten ([email protected]) |
| 3 | * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| 4 | * |
| 5 | * This library is free software; you can redistribute it and/or |
| 6 | * modify it under the terms of the GNU Library General Public |
| 7 | * License as published by the Free Software Foundation; either |
| 8 | * version 2 of the License, or (at your option) any later version. |
| 9 | * |
| 10 | * This library is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | * Library General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU Library General Public License |
| 16 | * along with this library; see the file COPYING.LIB. If not, write to |
| 17 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 18 | * Boston, MA 02110-1301, USA. |
| 19 | * |
| 20 | */ |
| 21 | |
| 22 | #ifndef Lexer_h |
| 23 | #define Lexer_h |
| 24 | |
| 25 | #include "Lookup.h" |
| 26 | #include "ParserArena.h" |
| 27 | #include "SourceCode.h" |
| 28 | #include <wtf/ASCIICType.h> |
| 29 | #include <wtf/SegmentedVector.h> |
| 30 | #include <wtf/Vector.h> |
| 31 | #include <wtf/unicode/Unicode.h> |
| 32 | |
| 33 | namespace JSC { |
| 34 | |
| 35 | class RegExp; |
| 36 | |
| 37 | class Lexer : public Noncopyable { |
| 38 | public: |
| 39 | // Character manipulation functions. |
| 40 | static bool isWhiteSpace(int character); |
| 41 | static bool isLineTerminator(int character); |
| 42 | static unsigned char convertHex(int c1, int c2); |
| 43 | static UChar convertUnicode(int c1, int c2, int c3, int c4); |
| 44 | |
| 45 | // Functions to set up parsing. |
| 46 | void setCode(const SourceCode&, ParserArena&); |
| 47 | void setIsReparsing() { m_isReparsing = true; } |
| 48 | |
| 49 | // Functions for the parser itself. |
| 50 | int lex(void* lvalp, void* llocp); |
| 51 | int lineNumber() const { return m_lineNumber; } |
| 52 | bool prevTerminator() const { return m_terminator; } |
| 53 | SourceCode sourceCode(int openBrace, int closeBrace, int firstLine); |
| 54 | bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0); |
| 55 | bool skipRegExp(); |
| 56 | |
| 57 | // Functions for use after parsing. |
| 58 | bool sawError() const { return m_error; } |
| 59 | void clear(); |
| 60 | |
| 61 | private: |
| 62 | friend class JSGlobalData; |
| 63 | |
| 64 | Lexer(JSGlobalData*); |
| 65 | ~Lexer(); |
| 66 | |
| 67 | void shift1(); |
| 68 | void shift2(); |
| 69 | void shift3(); |
| 70 | void shift4(); |
| 71 | void shiftLineTerminator(); |
| 72 | |
| 73 | void record8(int); |
| 74 | void record16(int); |
| 75 | void record16(UChar); |
| 76 | |
| 77 | void copyCodeWithoutBOMs(); |
| 78 | |
| 79 | int currentOffset() const; |
| 80 | const UChar* currentCharacter() const; |
| 81 | |
| 82 | const Identifier* makeIdentifier(const UChar* characters, size_t length); |
| 83 | |
| 84 | bool lastTokenWasRestrKeyword() const; |
| 85 | |
| 86 | static const size_t initialReadBufferCapacity = 32; |
| 87 | |
| 88 | int m_lineNumber; |
| 89 | |
| 90 | Vector<char> m_buffer8; |
| 91 | Vector<UChar> m_buffer16; |
| 92 | bool m_terminator; |
| 93 | bool m_delimited; // encountered delimiter like "'" and "}" on last run |
| 94 | int m_lastToken; |
| 95 | |
| 96 | const SourceCode* m_source; |
| 97 | const UChar* m_code; |
| 98 | const UChar* m_codeStart; |
| 99 | const UChar* m_codeEnd; |
| 100 | bool m_isReparsing; |
| 101 | bool m_atLineStart; |
| 102 | bool m_error; |
| 103 | |
| 104 | // current and following unicode characters (int to allow for -1 for end-of-file marker) |
| 105 | int m_current; |
| 106 | int m_next1; |
| 107 | int m_next2; |
| 108 | int m_next3; |
| 109 | |
| 110 | IdentifierArena* m_arena; |
| 111 | |
| 112 | JSGlobalData* m_globalData; |
| 113 | |
| 114 | const HashTable m_keywordTable; |
| 115 | |
| 116 | Vector<UChar> m_codeWithoutBOMs; |
| 117 | }; |
| 118 | |
| 119 | inline bool Lexer::isWhiteSpace(int ch) |
| 120 | { |
| 121 | return isASCII(c: ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(c: ch); |
| 122 | } |
| 123 | |
| 124 | inline bool Lexer::isLineTerminator(int ch) |
| 125 | { |
| 126 | return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028; |
| 127 | } |
| 128 | |
| 129 | inline unsigned char Lexer::convertHex(int c1, int c2) |
| 130 | { |
| 131 | return (toASCIIHexValue(c: c1) << 4) | toASCIIHexValue(c: c2); |
| 132 | } |
| 133 | |
| 134 | inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) |
| 135 | { |
| 136 | return (convertHex(c1, c2) << 8) | convertHex(c1: c3, c2: c4); |
| 137 | } |
| 138 | |
| 139 | // A bridge for yacc from the C world to the C++ world. |
| 140 | inline int jscyylex(void* lvalp, void* llocp, void* globalData) |
| 141 | { |
| 142 | return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp); |
| 143 | } |
| 144 | |
| 145 | } // namespace JSC |
| 146 | |
| 147 | #endif // Lexer_h |
| 148 | |