| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #pragma once
- #ifdef ENABLE_GLOBALIZATION
- namespace Js
- {
- class DelayLoadWindowsGlobalization;
- }
- #include "Windows.Globalization.h"
- #endif
- int CountNewlines(LPCOLESTR psz);
- class Parser;
- struct ParseContext;
- struct Token
- {
- private:
- union
- {
- struct
- {
- IdentPtr pid;
- const char * pchMin;
- int32 length;
- };
- int32 lw;
- struct
- {
- double dbl;
- // maybeInt will be true if the number did not contain 'e', 'E' , or '.'
- // notably important in asm.js where the '.' has semantic importance
- bool maybeInt;
- };
- UnifiedRegex::RegexPattern* pattern;
- struct
- {
- charcount_t ichMin;
- charcount_t ichLim;
- };
- } u;
- IdentPtr CreateIdentifier(HashTbl * hashTbl);
- public:
- Token() : tk(tkLim) {}
- tokens tk;
- BOOL IsIdentifier() const
- {
- return tk == tkID;
- }
- IdentPtr GetStr() const
- {
- Assert(tk == tkStrCon || tk == tkStrTmplBasic || tk == tkStrTmplBegin || tk == tkStrTmplMid || tk == tkStrTmplEnd);
- return u.pid;
- }
- IdentPtr GetIdentifier(HashTbl * hashTbl)
- {
- Assert(IsIdentifier() || IsReservedWord());
- if (u.pid)
- {
- return u.pid;
- }
- return CreateIdentifier(hashTbl);
- }
- int32 GetLong() const
- {
- Assert(tk == tkIntCon);
- return u.lw;
- }
- IdentPtr GetBigInt() const
- {
- Assert(tk == tkBigIntCon);
- return u.pid;
- }
- double GetDouble() const
- {
- Assert(tk == tkFltCon);
- return u.dbl;
- }
- bool GetDoubleMayBeInt() const
- {
- Assert(tk == tkFltCon);
- return u.maybeInt;
- }
- UnifiedRegex::RegexPattern * GetRegex()
- {
- Assert(tk == tkRegExp);
- return u.pattern;
- }
- // NOTE: THESE ROUTINES DEPEND ON THE ORDER THAT OPERATORS
- // ARE DECLARED IN kwd-xxx.h FILES.
- BOOL IsReservedWord() const
- {
- // Keywords and future reserved words (does not include operators)
- return tk < tkID;
- }
- BOOL IsKeyword() const;
- BOOL IsFutureReservedWord(const BOOL isStrictMode) const
- {
- // Reserved words that are not keywords
- return tk >= tkENUM && tk <= (isStrictMode ? tkSTATIC : tkENUM);
- }
- BOOL IsOperator() const
- {
- return tk >= tkComma && tk < tkLParen;
- }
- // UTF16 Scanner are only for syntax coloring. Only support
- // defer pid creation for UTF8
- void SetIdentifier(const char * pchMin, int32 len)
- {
- this->u.pid = nullptr;
- this->u.pchMin = pchMin;
- this->u.length = len;
- }
- void SetIdentifier(IdentPtr pid)
- {
- this->u.pid = pid;
- this->u.pchMin = nullptr;
- }
- void SetLong(int32 value)
- {
- this->u.lw = value;
- }
- void SetDouble(double dbl, bool maybeInt)
- {
- this->u.dbl = dbl;
- this->u.maybeInt = maybeInt;
- }
- void SetBigInt(IdentPtr pid)
- {
- this->u.pid = pid;
- this->u.pchMin = nullptr;
- }
- tokens SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser);
- };
- typedef BYTE UTF8Char;
- typedef UTF8Char* UTF8CharPtr;
- class NullTerminatedUnicodeEncodingPolicy
- {
- public:
- typedef OLECHAR EncodedChar;
- typedef const OLECHAR *EncodedCharPtr;
- protected:
- static const bool MultiUnitEncoding = false;
- static const size_t m_cMultiUnits = 0;
- static BOOL IsMultiUnitChar(OLECHAR ch) { return FALSE; }
- // See comment below regarding unused 'last' parameter
- static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
- template <bool bScan>
- static OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last) { return ch; }
- template <bool bScan>
- static OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
- static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
- static OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
- static OLECHAR ReadSurrogatePairUpper(const EncodedCharPtr&, const EncodedCharPtr& last)
- {
- AssertMsg(false, "method should not be called while scanning UTF16 string");
- return 0xfffe;
- }
- static void RestoreMultiUnits(size_t multiUnits) { }
- static size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset) { return offset; }
- static void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end)
- {
- Unused(end);
- js_memcpy_s(pch, cch * sizeof(OLECHAR), start, cch * sizeof(OLECHAR));
- }
- public:
- void Clear() {}
- void SetIsUtf8(bool isUtf8) { }
- bool IsUtf8() const { return false; }
- };
- template <bool nullTerminated>
- class UTF8EncodingPolicyBase
- {
- public:
- typedef utf8char_t EncodedChar;
- typedef LPCUTF8 EncodedCharPtr;
- protected:
- static const bool MultiUnitEncoding = true;
- size_t m_cMultiUnits;
- utf8::DecodeOptions m_decodeOptions;
- UTF8EncodingPolicyBase() { Clear(); }
- static BOOL IsMultiUnitChar(OLECHAR ch) { return ch > 0x7f; }
- // Note when nullTerminated is false we still need to increment the character pointer because the scanner "puts back" this virtual null character by decrementing the pointer
- static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p++) : (p++, 0); }
- // "bScan" indicates if this ReadFull is part of scanning. Pass true during scanning and ReadFull will update
- // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
- // otherwise and this doesn't affect Scanner state.
- template <bool bScan>
- OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last)
- {
- EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
- return !IsMultiUnitChar(ch) ? static_cast<OLECHAR>(ch) : ReadRest<bScan>(ch, p, last);
- }
- OLECHAR ReadSurrogatePairUpper(EncodedCharPtr &p, EncodedCharPtr last)
- {
- EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
- Assert(IsMultiUnitChar(ch));
- this->m_decodeOptions |= utf8::DecodeOptions::doSecondSurrogatePair;
- return ReadRest<true>(ch, p, last);
- }
- static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p) : 0; }
- OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last)
- {
- OLECHAR result = PeekFirst(p, last);
- if (IsMultiUnitChar(result))
- {
- result = ReadFull<false>(p, last);
- }
- return result;
- }
- // "bScan" indicates if this ReadRest is part of scanning. Pass true during scanning and ReadRest will update
- // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
- // otherwise and this doesn't affect Scanner state.
- template <bool bScan>
- OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last)
- {
- EncodedCharPtr s;
- if (bScan)
- {
- s = p;
- }
- OLECHAR result = utf8::DecodeTail(ch, p, last, m_decodeOptions);
- if (bScan)
- {
- // If we are scanning, update m_cMultiUnits counter.
- m_cMultiUnits += p - s;
- }
- return result;
- }
- void RestoreMultiUnits(size_t multiUnits) { m_cMultiUnits = multiUnits; }
- size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset)
- {
- // Note: current may be before or after last. If last is the null terminator, current should be within [start, last].
- // But if we excluded HTMLCommentSuffix for the source, last is before "// -->\0". Scanner may stop at null
- // terminator past last, then current is after last.
- Assert(current >= start);
- size_t currentUnitOffset = current - start;
- Assert(currentUnitOffset > m_cMultiUnits);
- Assert(currentUnitOffset - m_cMultiUnits < LONG_MAX);
- charcount_t currentCharacterOffset = charcount_t(currentUnitOffset - m_cMultiUnits);
- // If the offset is the current character offset then just return the current unit offset.
- if (currentCharacterOffset == offset) return currentUnitOffset;
- // If we have not encountered any multi-unit characters and we are moving backward the
- // character index and unit index are 1:1 so just return offset
- if (m_cMultiUnits == 0 && offset <= currentCharacterOffset) return offset;
- // Use local decode options
- utf8::DecodeOptions decodeOptions = IsUtf8() ? utf8::doDefault : utf8::doAllowThreeByteSurrogates;
- if (offset > currentCharacterOffset)
- {
- // If we are looking for an offset past current, current must be within [start, last]. We don't expect seeking
- // scanner position past last.
- Assert(current <= last);
- // If offset > currentOffset we already know the current character offset. The unit offset is the
- // unit index of offset - currentOffset characters from current.
- charcount_t charsLeft = offset - currentCharacterOffset;
- return currentUnitOffset + utf8::CharacterIndexToByteIndex(current, last - current, charsLeft, decodeOptions);
- }
- // If all else fails calculate the index from the start of the buffer.
- return utf8::CharacterIndexToByteIndex(start, currentUnitOffset, offset, decodeOptions);
- }
- void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end)
- {
- m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doSecondSurrogatePair);
- utf8::DecodeUnitsInto(pch, start, end, m_decodeOptions);
- }
- public:
- void Clear()
- {
- m_cMultiUnits = 0;
- m_decodeOptions = utf8::doAllowThreeByteSurrogates;
- }
- // If we get UTF8 source buffer, turn off doAllowThreeByteSurrogates but allow invalid WCHARs without replacing them with replacement 'g_chUnknown'.
- void SetIsUtf8(bool isUtf8)
- {
- if (isUtf8)
- {
- m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowThreeByteSurrogates | utf8::doAllowInvalidWCHARs);
- }
- else
- {
- m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowInvalidWCHARs | utf8::doAllowThreeByteSurrogates);
- }
- }
- bool IsUtf8() const { return (m_decodeOptions & utf8::doAllowThreeByteSurrogates) == 0; }
- };
- typedef UTF8EncodingPolicyBase<false> NotNullTerminatedUTF8EncodingPolicy;
- interface IScanner
- {
- virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine) = 0;
- virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine) = 0;
- };
- // Flags that can be provided to the Scan functions.
- // These can be bitwise OR'ed.
- enum ScanFlag
- {
- ScanFlagNone = 0,
- ScanFlagSuppressStrPid = 1, // Force strings to always have pid
- };
- typedef HRESULT (*CommentCallback)(void *data, OLECHAR firstChar, OLECHAR secondChar, bool containTypeDef, charcount_t min, charcount_t lim, bool adjacent, bool multiline, charcount_t startLine, charcount_t endLine);
- // Restore point defined using a relative offset rather than a pointer.
- struct RestorePoint
- {
- Field(charcount_t) m_ichMinTok;
- Field(charcount_t) m_ichMinLine;
- Field(size_t) m_cMinTokMultiUnits;
- Field(size_t) m_cMinLineMultiUnits;
- Field(charcount_t) m_line;
- Field(uint) functionIdIncrement;
- Field(size_t) lengthDecr;
- Field(BOOL) m_fHadEol;
- #ifdef DEBUG
- Field(size_t) m_cMultiUnits;
- #endif
- RestorePoint()
- : m_ichMinTok((charcount_t)-1),
- m_ichMinLine((charcount_t)-1),
- m_cMinTokMultiUnits((size_t)-1),
- m_cMinLineMultiUnits((size_t)-1),
- m_line((charcount_t)-1),
- functionIdIncrement(0),
- lengthDecr(0),
- m_fHadEol(FALSE)
- #ifdef DEBUG
- , m_cMultiUnits((size_t)-1)
- #endif
- {
- };
- };
- template <typename EncodingPolicy>
- class Scanner : public IScanner, public EncodingPolicy
- {
- friend Parser;
- typedef typename EncodingPolicy::EncodedChar EncodedChar;
- typedef typename EncodingPolicy::EncodedCharPtr EncodedCharPtr;
- public:
- Scanner(Parser* parser, Token *ptoken, Js::ScriptContext *scriptContext);
- ~Scanner(void);
- tokens Scan();
- tokens ScanNoKeywords();
- tokens ScanForcingPid();
- void SetText(EncodedCharPtr psz, size_t offset, size_t length, charcount_t characterOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber = 0);
- #if ENABLE_BACKGROUND_PARSING
- void PrepareForBackgroundParse(Js::ScriptContext *scriptContext);
- #endif
- enum ScanState
- {
- ScanStateNormal = 0,
- ScanStateStringTemplateMiddleOrEnd = 1,
- };
- ScanState GetScanState() { return m_scanState; }
- void SetScanState(ScanState state) { m_scanState = state; }
- bool SetYieldIsKeywordRegion(bool fYieldIsKeywordRegion)
- {
- bool fPrevYieldIsKeywordRegion = m_fYieldIsKeywordRegion;
- m_fYieldIsKeywordRegion = fYieldIsKeywordRegion;
- return fPrevYieldIsKeywordRegion;
- }
- bool YieldIsKeywordRegion()
- {
- return m_fYieldIsKeywordRegion;
- }
- bool YieldIsKeyword()
- {
- return YieldIsKeywordRegion() || this->IsStrictMode();
- }
- bool SetAwaitIsKeywordRegion(bool fAwaitIsKeywordRegion)
- {
- bool fPrevAwaitIsKeywordRegion = m_fAwaitIsKeywordRegion;
- m_fAwaitIsKeywordRegion = fAwaitIsKeywordRegion;
- return fPrevAwaitIsKeywordRegion;
- }
- bool AwaitIsKeywordRegion()
- {
- return m_fAwaitIsKeywordRegion;
- }
- bool AwaitIsKeyword()
- {
- return AwaitIsKeywordRegion() || this->m_fIsModuleCode;
- }
- tokens TryRescanRegExp();
- tokens RescanRegExp();
- tokens RescanRegExpNoAST();
- tokens RescanRegExpTokenizer();
- BOOL FHadNewLine(void)
- {
- return m_fHadEol;
- }
- IdentPtr PidFromLong(int32 lw);
- IdentPtr PidFromDbl(double dbl);
- LPCOLESTR StringFromLong(int32 lw);
- LPCOLESTR StringFromDbl(double dbl);
- IdentPtr GetSecondaryBufferAsPid();
- BYTE SetDeferredParse(BOOL defer)
- {
- BYTE fOld = m_DeferredParseFlags;
- if (defer)
- {
- m_DeferredParseFlags |= ScanFlagSuppressStrPid;
- }
- else
- {
- m_DeferredParseFlags = ScanFlagNone;
- }
- return fOld;
- }
- void SetDeferredParseFlags(BYTE flags)
- {
- m_DeferredParseFlags = flags;
- }
- // the functions IsDoubleQuoteOnLastTkStrCon() and IsHexOrOctOnLastTKNumber() works only with a scanner without lookahead
- // Both functions are used to get more info on the last token for specific diffs necessary for JSON parsing.
- //Single quotes are not legal in JSON strings. Make distinction between single quote string constant and single quote string
- BOOL IsDoubleQuoteOnLastTkStrCon()
- {
- return m_doubleQuoteOnLastTkStrCon;
- }
- // True if all chars of last string constant are ascii
- BOOL IsEscapeOnLastTkStrCon()
- {
- return m_EscapeOnLastTkStrCon;
- }
- bool LastIdentifierHasEscape()
- {
- return m_lastIdentifierHasEscape;
- }
- bool IsOctOrLeadingZeroOnLastTKNumber()
- {
- return m_OctOrLeadingZeroOnLastTKNumber;
- }
- // Returns the character offset of the first token. The character offset is the offset the first character of the token would
- // have if the entire file was converted to Unicode (UTF16-LE).
- charcount_t IchMinTok(void) const
- {
- Assert(m_pchMinTok - m_pchBase >= 0);
- Assert(m_pchMinTok - m_pchBase <= LONG_MAX);
- Assert(static_cast<charcount_t>(m_pchMinTok - m_pchBase) >= m_cMinTokMultiUnits);
- return static_cast<charcount_t>(m_pchMinTok - m_pchBase - m_cMinTokMultiUnits);
- }
- // Returns the character offset of the character immediately following the token. The character offset is the offset the first
- // character of the token would have if the entire file was converted to Unicode (UTF16-LE).
- charcount_t IchLimTok(void) const
- {
- Assert(m_currentCharacter - m_pchBase >= 0);
- Assert(m_currentCharacter - m_pchBase <= LONG_MAX);
- Assert(static_cast<charcount_t>(m_currentCharacter - m_pchBase) >= this->m_cMultiUnits);
- return static_cast<charcount_t>(m_currentCharacter - m_pchBase - this->m_cMultiUnits);
- }
- void SetErrorPosition(charcount_t ichMinError, charcount_t ichLimError)
- {
- Assert(ichLimError > 0 || ichMinError == 0);
- m_ichMinError = ichMinError;
- m_ichLimError = ichLimError;
- }
- charcount_t IchMinError(void) const
- {
- return m_ichLimError ? m_ichMinError : IchMinTok();
- }
- charcount_t IchLimError(void) const
- {
- return m_ichLimError ? m_ichLimError : IchLimTok();
- }
- // Returns the encoded unit offset of first character of the token. For example, in a UTF-8 encoding this is the offset into
- // the UTF-8 buffer. In Unicode this is the same as IchMinTok().
- size_t IecpMinTok(void) const
- {
- return static_cast< size_t >(m_pchMinTok - m_pchBase);
- }
- // Returns the encoded unit offset of the character immediately following the token. For example, in a UTF-8 encoding this is
- // the offset into the UTF-8 buffer. In Unicode this is the same as IchLimTok().
- size_t IecpLimTok(void) const
- {
- return static_cast< size_t >(m_currentCharacter - m_pchBase);
- }
- size_t IecpLimTokPrevious() const
- {
- AssertMsg(m_iecpLimTokPrevious != (size_t)-1, "IecpLimTokPrevious() cannot be called before scanning a token");
- return m_iecpLimTokPrevious;
- }
- charcount_t IchLimTokPrevious() const
- {
- AssertMsg(m_ichLimTokPrevious != (charcount_t)-1, "IchLimTokPrevious() cannot be called before scanning a token");
- return m_ichLimTokPrevious;
- }
- IdentPtr PidAt(size_t iecpMin, size_t iecpLim);
- // Returns the character offset within the stream of the first character on the current line.
- charcount_t IchMinLine(void) const
- {
- Assert(m_pchMinLine - m_pchBase >= 0);
- Assert(m_pchMinLine - m_pchBase <= LONG_MAX);
- Assert(static_cast<charcount_t>(m_pchMinLine - m_pchBase) >= m_cMinLineMultiUnits);
- return static_cast<charcount_t>(m_pchMinLine - m_pchBase - m_cMinLineMultiUnits);
- }
- // Returns the current line number
- charcount_t LineCur(void) const { return m_line; }
- void SetCurrentCharacter(charcount_t offset, ULONG lineNumber = 0)
- {
- DebugOnly(m_iecpLimTokPrevious = (size_t)-1);
- DebugOnly(m_ichLimTokPrevious = (charcount_t)-1);
- size_t length = m_pchLast - m_pchBase;
- if (offset > length) offset = static_cast< charcount_t >(length);
- size_t ibOffset = this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, offset);
- m_currentCharacter = m_pchBase + ibOffset;
- Assert(ibOffset >= offset);
- this->RestoreMultiUnits(ibOffset - offset);
- m_line = lineNumber;
- }
- // IScanner methods
- virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine)
- {
- ichMin = this->IchMinError();
- ichLim = this->IchLimError();
- line = this->LineCur();
- ichMinLine = this->IchMinLine();
- if (m_ichLimError && m_ichMinError < (charcount_t)ichMinLine)
- {
- line = m_startLine;
- ichMinLine = UpdateLine(line, m_pchStartLine, m_pchLast, 0, ichMin);
- }
- }
- virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine);
- class TemporaryBuffer
- {
- friend Scanner<EncodingPolicy>;
- private:
- // Keep a reference to the scanner.
- // We will use it to signal an error if we fail to allocate the buffer.
- Scanner<EncodingPolicy>* m_pscanner;
- uint32 m_cchMax;
- uint32 m_ichCur;
- __field_ecount(m_cchMax) OLECHAR *m_prgch;
- byte m_rgbInit[256];
- public:
- TemporaryBuffer()
- {
- m_pscanner = nullptr;
- m_prgch = (OLECHAR*)m_rgbInit;
- m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR);
- m_ichCur = 0;
- }
-
- ~TemporaryBuffer()
- {
- if (m_prgch != (OLECHAR*)m_rgbInit)
- {
- free(m_prgch);
- }
- }
- void Reset()
- {
- m_ichCur = 0;
- }
- void Clear()
- {
- if (m_prgch != (OLECHAR*)m_rgbInit)
- {
- free(m_prgch);
- m_prgch = (OLECHAR*)m_rgbInit;
- m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR);
- }
- Reset();
- }
- void AppendCh(uint ch)
- {
- return AppendCh<true>(ch);
- }
- template<bool performAppend> void AppendCh(uint ch)
- {
- if (performAppend)
- {
- if (m_ichCur >= m_cchMax)
- {
- Grow();
- }
- Assert(m_ichCur < m_cchMax);
- __analysis_assume(m_ichCur < m_cchMax);
- m_prgch[m_ichCur++] = static_cast<OLECHAR>(ch);
- }
- }
- private:
- void Grow()
- {
- Assert(m_pscanner != nullptr);
- byte *prgbNew;
- byte *prgbOld = (byte *)m_prgch;
- ULONG cbNew;
- if (FAILED(ULongMult(m_cchMax, sizeof(OLECHAR) * 2, &cbNew)))
- {
- m_pscanner->Error(ERRnoMemory);
- }
- if (prgbOld == m_rgbInit)
- {
- if (nullptr == (prgbNew = static_cast<byte*>(malloc(cbNew))))
- m_pscanner->Error(ERRnoMemory);
- js_memcpy_s(prgbNew, cbNew, prgbOld, m_ichCur * sizeof(OLECHAR));
- }
- else if (nullptr == (prgbNew = static_cast<byte*>(realloc(prgbOld, cbNew))))
- {
- m_pscanner->Error(ERRnoMemory);
- }
- m_prgch = (OLECHAR*)prgbNew;
- m_cchMax = cbNew / sizeof(OLECHAR);
- }
- };
- tokens GetPrevious() { return m_tkPrevious; }
- void Capture(_Out_ RestorePoint* restorePoint);
- void SeekTo(const RestorePoint& restorePoint);
- void SeekToForcingPid(const RestorePoint& restorePoint);
- void Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr);
- void SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId);
- void Clear();
- HashTbl * GetHashTbl() { return &m_htbl; }
- private:
- Parser *m_parser;
- HashTbl m_htbl;
- Token *m_ptoken;
- EncodedCharPtr m_pchBase; // beginning of source
- EncodedCharPtr m_pchLast; // The end of source
- EncodedCharPtr m_pchMinLine; // beginning of current line
- EncodedCharPtr m_pchMinTok; // beginning of current token
- EncodedCharPtr m_currentCharacter; // current character
- EncodedCharPtr m_pchPrevLine; // beginning of previous line
- size_t m_cMinTokMultiUnits; // number of multi-unit characters previous to m_pchMinTok
- size_t m_cMinLineMultiUnits; // number of multi-unit characters previous to m_pchMinLine
- uint16 m_fStringTemplateDepth; // we should treat } as string template middle starting character (depth instead of flag)
- BOOL m_fHadEol;
- BOOL m_fIsModuleCode : 1;
- BOOL m_doubleQuoteOnLastTkStrCon :1;
- bool m_OctOrLeadingZeroOnLastTKNumber :1;
- bool m_EscapeOnLastTkStrCon:1;
- bool m_lastIdentifierHasEscape:1;
- BOOL m_fNextStringTemplateIsTagged:1; // the next string template scanned has a tag (must create raw strings)
- BYTE m_DeferredParseFlags:2; // suppressStrPid and suppressIdPid
- bool es6UnicodeMode; // True if ES6Unicode Extensions are enabled.
- bool m_fYieldIsKeywordRegion; // Whether to treat 'yield' as an identifier or keyword
- bool m_fAwaitIsKeywordRegion; // Whether to treat 'await' as an identifier or keyword
- // Temporary buffer.
- TemporaryBuffer m_tempChBuf;
- TemporaryBuffer m_tempChBufSecondary;
- charcount_t m_line;
- ScanState m_scanState;
- charcount_t m_ichMinError;
- charcount_t m_ichLimError;
- charcount_t m_startLine;
- EncodedCharPtr m_pchStartLine;
- Js::ScriptContext* m_scriptContext;
- const Js::CharClassifier *charClassifier;
- tokens m_tkPrevious;
- size_t m_iecpLimTokPrevious;
- charcount_t m_ichLimTokPrevious;
- void ClearStates();
- template <bool forcePid>
- void SeekAndScan(const RestorePoint& restorePoint);
- tokens ScanCore(bool identifyKwds);
- tokens ScanAhead();
- tokens ScanError(EncodedCharPtr pchCur, tokens errorToken)
- {
- m_currentCharacter = pchCur;
- return m_ptoken->tk = tkScanError;
- }
- __declspec(noreturn) void Error(HRESULT hr)
- {
- m_pchMinTok = m_currentCharacter;
- m_cMinTokMultiUnits = this->m_cMultiUnits;
- throw ParseExceptionObject(hr);
- }
- EncodedCharPtr PchBase(void) const
- {
- return m_pchBase;
- }
- EncodedCharPtr PchMinTok(void)
- {
- return m_pchMinTok;
- }
- template<bool stringTemplateMode, bool createRawString> tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);
- tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);
- tokens ScanStringTemplateBegin(EncodedCharPtr *pp);
- tokens ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp);
- void ScanNewLine(uint ch);
- void NotifyScannedNewLine();
- charcount_t LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb);
- tokens ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp);
- BOOL FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last);
- tokens ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar, EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp);
- tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef);
- tokens ScanRegExpConstant(ArenaAllocator* alloc);
- tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc);
- EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, LikelyNumberType& likelyInt, size_t savedMultiUnits);
- IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar);
- IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last);
- uint32 UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last);
- void SaveSrcPos(void)
- {
- m_pchMinTok = m_currentCharacter;
- }
- OLECHAR PeekNextChar(void)
- {
- return this->PeekFull(m_currentCharacter, m_pchLast);
- }
- OLECHAR ReadNextChar(void)
- {
- return this->template ReadFull<true>(m_currentCharacter, m_pchLast);
- }
- EncodedCharPtr AdjustedLast() const
- {
- return m_pchLast;
- }
- size_t AdjustedLength() const
- {
- return AdjustedLast() - m_pchBase;
- }
- bool IsStrictMode() const
- {
- return this->m_parser != NULL && this->m_parser->IsStrictMode();
- }
- // This function expects the first character to be a 'u'
- // It will attempt to return a codepoint represented by a single escape point (either of the form \uXXXX or \u{any number of hex characters, s.t. value < 0x110000}
- bool TryReadEscape(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar = nullptr);
- template <bool bScan>
- bool TryReadCodePointRest(codepoint_t lower, EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar);
- template <bool bScan>
- inline bool TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar);
- inline BOOL IsIdContinueNext(EncodedCharPtr startingLocation, EncodedCharPtr endOfSource)
- {
- codepoint_t nextCodepoint;
- bool ignore;
- if (TryReadCodePoint<false>(startingLocation, endOfSource, &nextCodepoint, &ignore, &ignore))
- {
- return charClassifier->IsIdContinue(nextCodepoint);
- }
- return false;
- }
- charcount_t UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd);
- };
|