//------------------------------------------------------------------------------------------------------- // Copyright (C) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. //------------------------------------------------------------------------------------------------------- #include "ParserPch.h" /***************************************************************************** * * The following table speeds various tests of characters, such as whether * a given character can be part of an identifier, and so on. */ int CountNewlines(LPCOLESTR psz) { int cln = 0; while (0 != *psz) { switch (*psz++) { case _u('\xD'): if (*psz == _u('\xA')) { ++psz; } // fall-through case _u('\xA'): cln++; break; } } return cln; } BOOL Token::IsKeyword() const { // keywords (but not future reserved words) return (tk <= tkYIELD); } tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser) { Assert(parser); if(pattern) parser->RegisterRegexPattern(pattern); this->u.pattern = pattern; return tk = tkRegExp; } IdentPtr Token::CreateIdentifier(HashTbl * hashTbl) { Assert(this->u.pid == nullptr); if (this->u.pchMin) { Assert(IsIdentifier()); IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.pchMin + this->u.length, this->u.length); this->u.pid = pid; return pid; } Assert(IsReservedWord()); IdentPtr pid = hashTbl->PidFromTk(tk); this->u.pid = pid; return pid; } template Scanner::Scanner(Parser* parser, Token *ptoken, Js::ScriptContext* scriptContext) { Assert(ptoken); m_parser = parser; m_ptoken = ptoken; m_scriptContext = scriptContext; m_tempChBuf.m_pscanner = this; m_tempChBufSecondary.m_pscanner = this; this->charClassifier = scriptContext->GetCharClassifier(); this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled(); ClearStates(); } template Scanner::~Scanner(void) { } template void Scanner::ClearStates() { m_pchBase = nullptr; m_pchLast = nullptr; m_pchMinLine = nullptr; m_pchMinTok = nullptr; m_currentCharacter = nullptr; m_pchPrevLine = nullptr; m_cMinTokMultiUnits = 0; m_cMinLineMultiUnits = 0; m_fStringTemplateDepth = 0; m_fHadEol = FALSE; m_fIsModuleCode = FALSE; m_doubleQuoteOnLastTkStrCon = FALSE; m_OctOrLeadingZeroOnLastTKNumber = false; m_EscapeOnLastTkStrCon = false; m_fNextStringTemplateIsTagged = false; m_DeferredParseFlags = ScanFlagNone; m_fYieldIsKeywordRegion = false; m_fAwaitIsKeywordRegion = false; m_line = 0; m_scanState = ScanStateNormal; m_ichMinError = 0; m_ichLimError = 0; m_startLine = 0; m_pchStartLine = NULL; m_iecpLimTokPrevious = (size_t)-1; m_ichLimTokPrevious = (charcount_t)-1; } template void Scanner::Clear() { EncodingPolicy::Clear(); ClearStates(); this->m_tempChBuf.Clear(); this->m_tempChBufSecondary.Clear(); } /***************************************************************************** * * Initializes the scanner to prepare to scan the given source text. */ template void Scanner::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber) { // Save the start of the script and add the offset to get the point where we should start scanning. m_pchBase = pszSrc; m_pchLast = m_pchBase + offset + length; m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset; this->RestoreMultiUnits(offset - charOffset); // Absorb any byte order mark at the start if(offset == 0) { switch( this->PeekFull(m_currentCharacter, m_pchLast) ) { case 0xFFEE: // "Opposite" endian BOM // We do not support big-endian encodings // fall-through case 0xFEFF: // "Correct" BOM this->template ReadFull(m_currentCharacter, m_pchLast); break; } } m_line = lineNumber; m_startLine = lineNumber; m_pchStartLine = m_currentCharacter; m_ptoken->tk = tkNone; m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0; m_fHadEol = FALSE; m_DeferredParseFlags = ScanFlagNone; this->SetIsUtf8(isUtf8); } #if ENABLE_BACKGROUND_PARSING template void Scanner::PrepareForBackgroundParse(Js::ScriptContext *scriptContext) { scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0); scriptContext->GetThreadContext()->GetStandardChars((char16*)0); } #endif //----------------------------------------------------------------------------- // Number of code points from 'first' up to, but not including the next // newline character, embedded NUL, or 'last', depending on which comes first. // // This is used to determine a length of BSTR, which can't contain a NUL character. //----------------------------------------------------------------------------- template charcount_t Scanner::LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb) { Assert(cb != nullptr); charcount_t result = 0; EncodedCharPtr p = first; for (;;) { EncodedCharPtr prev = p; switch( this->template ReadFull(p, last) ) { case kchNWL: // _C_NWL case kchRET: case kchLS: case kchPS: case kchNUL: // _C_NUL // p is now advanced past the line terminator character. // We need to know the number of bytes making up the line, not including the line terminator character. // To avoid subtracting a variable number of bytes because the line terminator characters are different // number of bytes long (plus there may be multiple valid encodings for these characters) just keep // track of the first byte of the line terminator character in prev. Assert(prev >= first); *cb = prev - first; return result; } result++; } } template charcount_t Scanner::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd) { EncodedCharPtr p = start; charcount_t ich = ichStart; int32 current = line; charcount_t lastStart = ichStart; while (ich < ichEnd) { ich++; switch (this->template ReadFull(p, last)) { case kchRET: if (this->PeekFull(p, last) == kchNWL) { ich++; this->template ReadFull(p, last); } // fall-through case kchNWL: case kchLS: case kchPS: current++; lastStart = ich; break; case kchNUL: goto done; } } done: line = current; return lastStart; } template bool Scanner::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar) { Assert(outChar != nullptr); Assert(startingLocation <= endOfSource); EncodedCharPtr currentLocation = startingLocation; codepoint_t charToOutput = 0x0; // '\' is Assumed as there is only one caller // Read 'u' characters if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u') { return false; } bool expectCurly = false; if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode) { expectCurly = true; // Move past the character this->ReadFirst(currentLocation, endOfSource); } uint i = 0; OLECHAR ch = 0; int hexValue = 0; uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u); for(; i < maxHexDigits && currentLocation < endOfSource; i++) { if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue)) { break; } charToOutput = charToOutput * 0x10 + hexValue; if (charToOutput > 0x10FFFF) { return false; } } //At least 4 characters have to be read if (i == 0 || (i != 4 && !expectCurly)) { return false; } Assert(expectCurly ? es6UnicodeMode : true); if (expectCurly && ch != '}') { return false; } *outChar = charToOutput; startingLocation = currentLocation; return true; } template template bool Scanner::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar) { Assert(outChar != nullptr); Assert(outContainsMultiUnitChar != nullptr); Assert(es6UnicodeMode); Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower)); EncodedCharPtr currentLocation = startingLocation; *outChar = lower; if (currentLocation < endOfSource) { size_t restorePoint = this->m_cMultiUnits; codepoint_t upper = this->template ReadFull(currentLocation, endOfSource); if (Js::NumberUtilities::IsSurrogateUpperPart(upper)) { *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper); if (this->IsMultiUnitChar(static_cast(upper))) { *outContainsMultiUnitChar = true; } startingLocation = currentLocation; } else { this->RestoreMultiUnits(restorePoint); } } return true; } template template inline bool Scanner::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar) { Assert(outChar != nullptr); Assert(outContainsMultiUnitChar != nullptr); if (startingLocation >= endOfSource) { return false; } codepoint_t ch = this->template ReadFull(startingLocation, endOfSource); if (FBigChar(ch)) { if (this->IsMultiUnitChar(static_cast(ch))) { *outContainsMultiUnitChar = true; } if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch)) { return TryReadCodePointRest(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar); } } else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch)) { *hasEscape = true; } *outChar = ch; return true; } template tokens Scanner::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp) { EncodedCharPtr p = *pp; EncodedCharPtr pchMin = p; // JS6 allows unicode characters in the form of \uxxxx escape sequences // to be part of the identifier. bool fHasEscape = false; bool fHasMultiChar = false; codepoint_t codePoint = INVALID_CODEPOINT; size_t multiUnitsBeforeLast = this->m_cMultiUnits; // Check if we started the id if (!TryReadCodePoint(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar)) { // If no chars. could be scanned as part of the identifier, return error. return tkScanError; } Assert(codePoint < 0x110000u); if (!charClassifier->IsIdStart(codePoint)) { // Put back the last character this->RestoreMultiUnits(multiUnitsBeforeLast); // If no chars. could be scanned as part of the identifier, return error. return tkScanError; } return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp); } template BOOL Scanner::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last) { if (EncodingPolicy::MultiUnitEncoding) { while (p < last) { EncodedChar currentChar = *p; if (this->IsMultiUnitChar(currentChar)) { // multi unit character, we may not have reach the end yet return FALSE; } Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast(currentChar)); if (!charClassifier->IsIdContinueFast(currentChar)) { // only reach the end of the identifier if it is not the start of an escape sequence return currentChar != '\\'; } p++; } // We have reach the end of the identifier. return TRUE; } // Not fast path for non multi unit encoding return false; } template tokens Scanner::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar, EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp) { EncodedCharPtr last = m_pchLast; while (true) { // Fast path for utf8, non-multi unit char and not escape if (FastIdentifierContinue(p, last)) { break; } // Slow path that has to deal with multi unit encoding codepoint_t codePoint = INVALID_CODEPOINT; EncodedCharPtr pchBeforeLast = p; size_t multiUnitsBeforeLast = this->m_cMultiUnits; if (TryReadCodePoint(p, last, &codePoint, &fHasEscape, &fHasMultiChar)) { Assert(codePoint < 0x110000u); if (charClassifier->IsIdContinue(codePoint)) { continue; } } // Put back the last character p = pchBeforeLast; this->RestoreMultiUnits(multiUnitsBeforeLast); break; } Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX); *pp = p; if (!identifyKwds) { return tkID; } // UTF16 Scanner are only for syntax coloring, so it shouldn't come here. if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape) { Assert(sizeof(EncodedChar) == 1); // If there are no escape, that the main scan loop would have found the keyword already // So we can just assume it is an ID DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p)); DebugOnly(tokens tk = Ident::TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode())); Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword())); m_ptoken->SetIdentifier(reinterpret_cast(pchMin), (int32)(p - pchMin)); return tkID; } IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar); m_ptoken->SetIdentifier(pid); if (!fHasEscape) { // If it doesn't have escape, then Scan() should have taken care of keywords (except // yield if m_fYieldIsKeyword is false, in which case yield is treated as an identifier, and except // await if m_fAwaitIsKeyword is false, in which case await is treated as an identifier). // We don't have to check if the name is reserved word and return it as an Identifier Assert(pid->Tk(IsStrictMode()) == tkID || (pid->Tk(IsStrictMode()) == tkYIELD && !this->YieldIsKeyword()) || (pid->Tk(IsStrictMode()) == tkAWAIT && !this->AwaitIsKeyword())); return tkID; } tokens tk = pid->Tk(IsStrictMode()); return tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()) ? tkID : tkNone; } template IdentPtr Scanner::PidAt(size_t iecpMin, size_t iecpLim) { Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin); return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim); } template uint32 Scanner::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last) { m_tempChBuf.Reset(); while( p < last ) { codepoint_t codePoint; bool hasEscape, isMultiChar; bool gotCodePoint = TryReadCodePoint(p, last, &codePoint, &hasEscape, &isMultiChar); Assert(gotCodePoint); Assert(codePoint < 0x110000); if (codePoint < 0x10000) { m_tempChBuf.AppendCh((OLECHAR)codePoint); } else { char16 lower, upper; Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper); m_tempChBuf.AppendCh(lower); m_tempChBuf.AppendCh(upper); } } return m_tempChBuf.m_ichCur; } template IdentPtr Scanner::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last) { int32 cch = UnescapeToTempBuf(p, last); return this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, cch); } template IdentPtr Scanner::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar) { // If there is an escape sequence in the JS6 identifier or it is a UTF8 // source then we have to convert it to the equivalent char so we use a // buffer for translation. if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape) { return PidOfIdentiferAt(p, last); } else if (EncodingPolicy::MultiUnitEncoding) { Assert(sizeof(EncodedChar) == 1); return this->GetHashTbl()->PidHashNameLen(reinterpret_cast(p), reinterpret_cast(last), (int32)(last - p)); } else { Assert(sizeof(EncodedChar) == 2); return this->GetHashTbl()->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p)); } } template typename Scanner::EncodedCharPtr Scanner::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt) { EncodedCharPtr last = m_pchLast; EncodedCharPtr pchT = nullptr; bool baseSpecified = false; likelyInt = true; // Reset m_OctOrLeadingZeroOnLastTKNumber = false; auto baseSpecifierCheck = [&pchT, &pdbl, p, &baseSpecified]() { if (pchT == p + 2) { // An octal token '0' was followed by a base specifier: /0[xXoObB]/ // This literal can no longer be a double *pdbl = 0; // Advance the character pointer to the base specifier pchT = p + 1; // Set the flag so we know to offset the potential identifier search after the literal baseSpecified = true; } }; if ('0' == this->PeekFirst(p, last)) { switch(this->PeekFirst(p + 1, last)) { case '.': case 'e': case 'E': likelyInt = false; // Floating point goto LFloat; case 'x': case 'X': // Hex *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT); baseSpecifierCheck(); goto LIdCheck; case 'o': case 'O': // Octal *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT); baseSpecifierCheck(); goto LIdCheck; case 'b': case 'B': // Binary *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT); baseSpecifierCheck(); goto LIdCheck; default: // Octal *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT); Assert(pchT > p); #if !SOURCERELEASE // If an octal literal is malformed then it is in fact a decimal literal. #endif // !SOURCERELEASE if(*pdbl != 0 || pchT > p + 1) m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok switch (*pchT) { case '8': case '9': // case 'e': // case 'E': // case '.': m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09.... goto LFloat; } goto LIdCheck; } } else { LFloat: *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyInt); Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl)); // fall through to LIdCheck } LIdCheck: // https://tc39.github.io/ecma262/#sec-literals-numeric-literals // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit. // For example : 3in is an error and not the two input elements 3 and in // If a base was speficied, use the first character denoting the constant. In this case, pchT is pointing to the base specifier. EncodedCharPtr startingLocation = baseSpecified ? pchT + 1 : pchT; codepoint_t outChar = *startingLocation; if (this->IsMultiUnitChar((OLECHAR)outChar)) { outChar = this->template ReadRest((OLECHAR)outChar, startingLocation, last); } if (this->charClassifier->IsIdStart(outChar)) { Error(ERRIdAfterLit); } // IsIdStart does not cover the unicode escape case. Try to read a unicode escape from the 'u' char. if (*pchT == '\\') { startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand. if (TryReadEscape(startingLocation, m_pchLast, &outChar)) { Error(ERRIdAfterLit); } } if (Js::NumberUtilities::IsDigit(*startingLocation)) { Error(ERRbadNumber); } return pchT; } template tokens Scanner::TryRescanRegExp() { EncodedCharPtr current = m_currentCharacter; tokens result = RescanRegExp(); if (result == tkScanError) m_currentCharacter = current; return result; } template tokens Scanner::RescanRegExp() { #if DEBUG switch (m_ptoken->tk) { case tkDiv: Assert(m_currentCharacter == m_pchMinTok + 1); break; case tkAsgDiv: Assert(m_currentCharacter == m_pchMinTok + 2); break; default: AssertMsg(FALSE, "Who is calling RescanRegExp?"); break; } #endif //DEBUG m_currentCharacter = m_pchMinTok; if (*m_currentCharacter != '/') Error(ERRnoSlash); m_currentCharacter++; tokens tk = tkNone; { ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc); tk = ScanRegExpConstant(&alloc); } return tk; } template tokens Scanner::RescanRegExpNoAST() { #if DEBUG switch (m_ptoken->tk) { case tkDiv: Assert(m_currentCharacter == m_pchMinTok + 1); break; case tkAsgDiv: Assert(m_currentCharacter == m_pchMinTok + 2); break; default: AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?"); break; } #endif //DEBUG m_currentCharacter = m_pchMinTok; if (*m_currentCharacter != '/') Error(ERRnoSlash); m_currentCharacter++; tokens tk = tkNone; { ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc); { tk = ScanRegExpConstantNoAST(&alloc); } } return tk; } template tokens Scanner::RescanRegExpTokenizer() { #if DEBUG switch (m_ptoken->tk) { case tkDiv: Assert(m_currentCharacter == m_pchMinTok + 1); break; case tkAsgDiv: Assert(m_currentCharacter == m_pchMinTok + 2); break; default: AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?"); break; } #endif //DEBUG m_currentCharacter = m_pchMinTok; if (*m_currentCharacter != '/') Error(ERRnoSlash); m_currentCharacter++; tokens tk = tkNone; ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread(); threadContext->EnsureRecycler(); Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp")); TryFinally( [&]() /* try block */ { tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator()); }, [&](bool /* hasException */) /* finally block */ { threadContext->ReleaseTemporaryAllocator(alloc); }); return tk; } template tokens Scanner::ScanRegExpConstant(ArenaAllocator* alloc) { PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex); // SEE ALSO: RegexHelper::PrimCompileDynamic() #ifdef PROFILE_EXEC m_scriptContext->ProfileBegin(Js::RegexCompilePhase); #endif ArenaAllocator* ctAllocator = alloc; UnifiedRegex::StandardChars* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0); UnifiedRegex::StandardChars* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0); #if ENABLE_REGEX_CONFIG_OPTIONS UnifiedRegex::DebugWriter *w = 0; if (REGEX_CONFIG_FLAG(RegexDebug)) w = m_scriptContext->GetRegexDebugWriter(); if (REGEX_CONFIG_FLAG(RegexProfile)) m_scriptContext->GetRegexStatsDatabase()->BeginProfile(); #endif UnifiedRegex::Node* root = 0; charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0; UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags; UnifiedRegex::Parser parser ( m_scriptContext , ctAllocator , standardEncodedChars , standardChars , this->IsUtf8() #if ENABLE_REGEX_CONFIG_OPTIONS , w #endif ); try { root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags); } catch (UnifiedRegex::ParseError e) { #ifdef PROFILE_EXEC m_scriptContext->ProfileEnd(Js::RegexCompilePhase); #endif m_currentCharacter += e.encodedPos; Error(e.error); } UnifiedRegex::RegexPattern* pattern; if (m_parser->IsBackgroundParser()) { // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern // and hook it to this parse node. pattern = parser.template CompileProgram(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags); } else { pattern = parser.template CompileProgram(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags); } this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits return m_ptoken->SetRegex(pattern, m_parser); } template tokens Scanner::ScanRegExpConstantNoAST(ArenaAllocator* alloc) { PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex); ThreadContext *threadContext = m_scriptContext->GetThreadContext(); UnifiedRegex::StandardChars* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0); UnifiedRegex::StandardChars* standardChars = threadContext->GetStandardChars((char16*)0); charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0; UnifiedRegex::Parser parser ( m_scriptContext , alloc , standardEncodedChars , standardChars , this->IsUtf8() #if ENABLE_REGEX_CONFIG_OPTIONS , 0 #endif ); try { parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars); } catch (UnifiedRegex::ParseError e) { m_currentCharacter += e.encodedPos; Error(e.error); // never reached } UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram(nullptr, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, UnifiedRegex::NoRegexFlags); Assert(pattern == nullptr); // BuildAST == false, CompileProgram should return nullptr this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits return (m_ptoken->tk = tkRegExp); } template tokens Scanner::ScanStringTemplateBegin(EncodedCharPtr *pp) { // String template must begin with a string constant followed by '`' or '${' ScanStringConstant('`', pp); OLECHAR ch; EncodedCharPtr last = m_pchLast; ch = this->ReadFirst(*pp, last); if (ch == '`') { // Simple string template - no substitutions return tkStrTmplBasic; } else if (ch == '$') { ch = this->ReadFirst(*pp, last); if (ch == '{') { // Next token after expr should be tkStrTmplMid or tkStrTmplEnd. // In string template scanning mode, we expect the next char to be '}' // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid m_fStringTemplateDepth++; // Regular string template begin - next is first substitution return tkStrTmplBegin; } } // Error - make sure pointer stays at the last character of the error token instead of after it in the error case (*pp)--; return ScanError(m_currentCharacter, tkStrTmplBegin); } template tokens Scanner::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp) { // String template middle and end tokens must begin with a string constant ScanStringConstant('`', pp); OLECHAR ch; EncodedCharPtr last = m_pchLast; ch = this->ReadFirst(*pp, last); if (ch == '`') { // No longer in string template scanning mode m_fStringTemplateDepth--; // This is the last part of the template ...` return tkStrTmplEnd; } else if (ch == '$') { ch = this->ReadFirst(*pp, last); if (ch == '{') { // This is just another middle part of the template }...${ return tkStrTmplMid; } } // Error - make sure pointer stays at the last character of the error token instead of after it in the error case (*pp)--; return ScanError(m_currentCharacter, tkStrTmplEnd); } /***************************************************************************** * * Parses a string constant. Note that the string value is stored in * a volatile buffer (or allocated on the heap if too long), and thus * the string should be saved off before the next token is scanned. */ template template tokens Scanner::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp) { static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value"); OLECHAR ch, c, rawch; int wT; EncodedCharPtr p = *pp; EncodedCharPtr last = m_pchLast; // Reset m_OctOrLeadingZeroOnLastTKNumber = false; m_EscapeOnLastTkStrCon = FALSE; m_tempChBuf.Reset(); // Use template parameter to gate raw string creation. // If createRawString is false, all these operations should be no-ops if (createRawString) { m_tempChBufSecondary.Reset(); } for (;;) { switch ((rawch = ch = this->ReadFirst(p, last))) { case kchRET: if (stringTemplateMode) { if (this->PeekFirst(p, last) == kchNWL) { // Eat the char, ignore return this->ReadFirst(p, last); } // Both and are normalized to in template cooked and raw values ch = rawch = kchNWL; } LEcmaLineBreak: // Fall through case kchNWL: if (stringTemplateMode) { // Notify the scanner to update current line, number of lines etc NotifyScannedNewLine(); // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out. m_pchMinLine = p; break; } m_currentCharacter = p - 1; Error(ERRnoStrEnd); case '"': case '\'': if (ch == delim) goto LBreak; break; case '`': // In string template scan mode, don't consume the '`' - we need to differentiate // between a closed string template and the expression open sequence - ${ if (stringTemplateMode) { p--; goto LBreak; } // If we aren't scanning for a string template, do the default thing goto LMainDefault; case '$': // If we are parsing a string literal part of a string template, ${ indicates we need to switch // to parsing an expression. if (stringTemplateMode && this->PeekFirst(p, last) == '{') { // Rewind to the $ and return p--; goto LBreak; } // If we aren't scanning for a string template, do the default thing goto LMainDefault; case kchNUL: if (p > last) { m_currentCharacter = p - 1; Error(ERRnoStrEnd); } break; default: LMainDefault: if (this->IsMultiUnitChar(ch)) { if ((ch == kchLS || ch == kchPS)) { goto LEcmaLineBreak; } rawch = ch = this->template ReadRest(ch, p, last); switch (ch) { case kchLS: // 0x2028, classifies as new line case kchPS: // 0x2029, classifies as new line goto LEcmaLineBreak; } } break; case kchBSL: // In raw mode '\\' is not an escape character, just add the char into the raw buffer. m_tempChBufSecondary.template AppendCh(ch); m_EscapeOnLastTkStrCon=TRUE; // In raw mode, we append the raw char itself and not the escaped value so save the char. rawch = ch = this->ReadFirst(p, last); codepoint_t codePoint = 0; uint errorType = (uint)ERRbadHexDigit; switch (ch) { case 'b': ch = 0x08; break; case 't': ch = 0x09; break; case 'v': ch = 0x0B; //Only in ES5 mode break; //same as default case 'n': ch = 0x0A; break; case 'f': ch = 0x0C; break; case 'r': ch = 0x0D; break; case 'x': // Insert the 'x' here before jumping to parse the hex digits. m_tempChBufSecondary.template AppendCh(ch); // 2 hex digits ch = 0; goto LTwoHex; case 'u': // Raw string just inserts a 'u' here. m_tempChBufSecondary.template AppendCh(ch); ch = 0; if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) goto LFourHex; else if (c != '{' || !this->es6UnicodeMode) goto ReturnScanError; Assert(c == '{'); // c should definitely be a '{' which should be appended to the raw string. m_tempChBufSecondary.template AppendCh(c); //At least one digit is expected if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) { goto ReturnScanError; } m_tempChBufSecondary.template AppendCh(c); codePoint = static_cast(wT); while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) { m_tempChBufSecondary.template AppendCh(c); codePoint <<= 4; codePoint += static_cast(wT); if (codePoint > 0x10FFFF) { errorType = (uint)ERRInvalidCodePoint; goto ReturnScanError; } } if (c != '}') { errorType = (uint)ERRMissingCurlyBrace; goto ReturnScanError; } Assert(codePoint <= 0x10FFFF); if (codePoint >= 0x10000) { OLECHAR lower = 0; Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch); m_tempChBuf.AppendCh(lower); } else { ch = (char16)codePoint; } // In raw mode we want the last hex character or the closing curly. c should hold one or the other. if (createRawString) rawch = c; break; LFourHex: codePoint = 0x0; // Append first hex digit character to the raw string. m_tempChBufSecondary.template AppendCh(c); codePoint += static_cast(wT * 0x1000); if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) goto ReturnScanError; // Append fourth (or second) hex digit character to the raw string. m_tempChBufSecondary.template AppendCh(c); codePoint += static_cast(wT * 0x0100); LTwoHex: // This code path doesn't expect curly. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) goto ReturnScanError; // Append first hex digit character to the raw string. m_tempChBufSecondary.template AppendCh(c); codePoint += static_cast(wT * 0x0010); if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) goto ReturnScanError; codePoint += static_cast(wT); // In raw mode we want the last hex character or the closing curly. c should hold one or the other. if (createRawString) rawch = c; if (codePoint < 0x10000) { ch = static_cast(codePoint); } else { goto ReturnScanError; } break; case '0': case '1': case '2': case '3': // 1 to 3 octal digits ch -= '0'; // Octal escape sequences are not allowed inside string template literals if (stringTemplateMode) { c = this->PeekFirst(p, last); if (ch != 0 || (c >= '0' && c <= '7')) { errorType = (uint)ERRES5NoOctal; goto ReturnScanError; } break; } wT = (c = this->ReadFirst(p, last)) - '0'; if ((char16)wT > 7) { if (ch != 0 || ((char16)wT <= 9)) { m_OctOrLeadingZeroOnLastTKNumber = true; } p--; break; } m_OctOrLeadingZeroOnLastTKNumber = true; ch = static_cast< OLECHAR >(ch * 8 + wT); goto LOneOctal; case '4': case '5': case '6': case '7': // 1 to 2 octal digits // Octal escape sequences are not allowed inside string template literals if (stringTemplateMode) { errorType = (uint)ERRES5NoOctal; goto ReturnScanError; } ch -= '0'; m_OctOrLeadingZeroOnLastTKNumber = true; LOneOctal: wT = (c = this->ReadFirst(p, last)) - '0'; if ((char16)wT > 7) { p--; break; } ch = static_cast< OLECHAR >(ch * 8 + wT); break; case kchRET: // 0xD if (stringTemplateMode) { // If this is \ we can eat the right now if (this->PeekFirst(p, last) == kchNWL) { // Eat the char, ignore return this->ReadFirst(p, last); } // Both \ and \ are normalized to \ in template raw string rawch = kchNWL; } case kchLS: // 0x2028, classifies as new line case kchPS: // 0x2029, classifies as new line case kchNWL: // 0xA LEcmaEscapeLineBreak: if (stringTemplateMode) { // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings m_tempChBufSecondary.template AppendCh(rawch); // Template literal strings ignore all escaped line continuation tokens NotifyScannedNewLine(); // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out. m_pchMinLine = p; continue; } m_currentCharacter = p; ScanNewLine(ch); p = m_currentCharacter; continue; case 0: if (p >= last) { errorType = (uint)ERRnoStrEnd; ReturnScanError: m_currentCharacter = p - 1; Error(errorType); } else if (stringTemplateMode) { // Escaped null character is translated into 0x0030 for raw template literals rawch = 0x0030; } break; default: if (this->IsMultiUnitChar(ch)) { rawch = ch = this->template ReadRest(ch, p, last); switch (ch) { case kchLS: case kchPS: goto LEcmaEscapeLineBreak; } } break; } break; } m_tempChBuf.AppendCh(ch); m_tempChBufSecondary.template AppendCh(rawch); } LBreak: bool createPid = true; if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0) { createPid = false; if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR)))) { createPid = true; } } if (createPid) { m_ptoken->SetIdentifier(this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur)); } else { m_ptoken->SetIdentifier(NULL); } m_scanState = ScanStateNormal; m_doubleQuoteOnLastTkStrCon = '"' == delim; *pp = p; return tkStrCon; } template tokens Scanner::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp) { return ScanStringConstant(delim, pp); } /***************************************************************************** * * Consume a C-style comment. */ template tokens Scanner::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef) { Assert(containTypeDef != nullptr); EncodedCharPtr p = *pp; *containTypeDef = false; EncodedCharPtr last = m_pchLast; OLECHAR ch; for (;;) { switch((ch = this->ReadFirst(p, last))) { case '*': if (*p == '/') { *pp = p + 1; return tkNone; } break; // ES 2015 11.3 Line Terminators case kchLS: // 0x2028, classifies as new line case kchPS: // 0x2029, classifies as new line LEcmaLineBreak: goto LLineBreak; case kchRET: case kchNWL: LLineBreak: m_fHadEol = TRUE; m_currentCharacter = p; ScanNewLine(ch); p = m_currentCharacter; break; case kchNUL: if (p >= last) { m_currentCharacter = p - 1; *pp = p - 1; Error(ERRnoCmtEnd); } break; default: if (this->IsMultiUnitChar(ch)) { ch = this->template ReadRest(ch, p, last); switch (ch) { case kchLS: case kchPS: goto LEcmaLineBreak; } } break; } } } /***************************************************************************** * * We've encountered a newline - update various counters and things. */ template void Scanner::ScanNewLine(uint ch) { if (ch == '\r' && PeekNextChar() == '\n') { ReadNextChar(); } NotifyScannedNewLine(); } /***************************************************************************** * * We've encountered a newline - update various counters and things. */ template void Scanner::NotifyScannedNewLine() { // update in scanner: previous line, current line, number of lines. m_line++; m_pchPrevLine = m_pchMinLine; m_pchMinLine = m_currentCharacter; m_cMinLineMultiUnits = this->m_cMultiUnits; } /***************************************************************************** * * Delivers a token stream. */ template tokens Scanner::ScanForcingPid() { if (m_DeferredParseFlags != ScanFlagNone) { BYTE deferredParseFlagsSave = m_DeferredParseFlags; m_DeferredParseFlags = ScanFlagNone; tokens result = tkEOF; TryFinally( [&]() /* try block */ { result = this->Scan(); }, [&](bool) /* finally block */ { this->m_DeferredParseFlags = deferredParseFlagsSave; }); return result; } return Scan(); } template tokens Scanner::Scan() { return ScanCore(true); } template tokens Scanner::ScanNoKeywords() { return ScanCore(false); } template tokens Scanner::ScanAhead() { return ScanNoKeywords(); } template tokens Scanner::ScanCore(bool identifyKwds) { codepoint_t ch; OLECHAR firstChar; OLECHAR secondChar; EncodedCharPtr pchT; size_t multiUnits = 0; EncodedCharPtr p = m_currentCharacter; EncodedCharPtr last = m_pchLast; bool seenDelimitedCommentEnd = false; // store the last token m_tkPrevious = m_ptoken->tk; m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas m_ichLimTokPrevious = IchLimTok(); if (p >= last) { m_pchMinTok = p; m_cMinTokMultiUnits = this->m_cMultiUnits; goto LEof; } tokens token; m_fHadEol = FALSE; CharTypes chType; charcount_t commentStartLine; if (m_scanState && *p != 0) { if (m_scanState == ScanStateStringTemplateMiddleOrEnd) { AssertMsg(m_fStringTemplateDepth > 0, "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template"); m_scanState = ScanStateNormal; pchT = p; token = ScanStringTemplateMiddleOrEnd(&pchT); p = pchT; goto LDone; } } for (;;) { LLoop: m_pchMinTok = p; m_cMinTokMultiUnits = this->m_cMultiUnits; ch = this->ReadFirst(p, last); #if DEBUG chType = this->charClassifier->GetCharType((OLECHAR)ch); #endif switch (ch) { default: if (ch == kchLS || ch == kchPS ) { goto LNewLine; } { BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch); if (isMultiUnit) { ch = this->template ReadRest((OLECHAR)ch, p, last); } if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch)) { codepoint_t upper = this->PeekFull(p, last); if (Js::NumberUtilities::IsSurrogateUpperPart(upper)) { // Consume the rest of the utf8 bytes for the codepoint OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last); Assert(decodedUpper == (OLECHAR) upper); ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper); } } if (this->charClassifier->IsIdStart(ch)) { // We treat IDContinue as an error. token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p); break; } } chType = this->charClassifier->GetCharType(ch); switch (chType) { case _C_WSP: continue; case _C_NWL: goto LNewLine; // All other types (except errors) are handled by the outer switch. } Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG); m_currentCharacter = p - 1; Error(ERRillegalChar); continue; case '\0': // Put back the null in case we get called again. p--; if (p < last) { // A \0 prior to the end of the text is an invalid character. m_currentCharacter = p; Error(ERRillegalChar); } LEof: Assert(p >= last); token = tkEOF; break; case 0x0009: case 0x000B: case 0x000C: case 0x0020: Assert(chType == _C_WSP); continue; case '.': if (!Js::NumberUtilities::IsDigit(*p)) { // Not a double if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() && this->PeekFirst(p, last) == '.' && this->PeekFirst(p + 1, last) == '.') { token = tkEllipsis; p += 2; } else { token = tkDot; } break; } // May be a double, fall through case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { double dbl; Assert(chType == _C_DIG || chType == _C_DOT); p = m_pchMinTok; this->RestoreMultiUnits(m_cMinTokMultiUnits); bool likelyInt = true; pchT = FScanNumber(p, &dbl, likelyInt); if (p == pchT) { Assert(this->PeekFirst(p, last) != '.'); Error(ERRbadNumber); } Assert(!Js::NumberUtilities::IsNan(dbl)); p = pchT; int32 value; if (likelyInt && Js::NumberUtilities::FDblIsInt32(dbl, &value)) { m_ptoken->SetLong(value); token = tkIntCon; } else { token = tkFltCon; m_ptoken->SetDouble(dbl, likelyInt); } break; } case '(': Assert(chType == _C_LPR); token = tkLParen; break; case ')': Assert(chType == _C_RPR); token = tkRParen; break; case ',': Assert(chType == _C_CMA); token = tkComma; break; case ';': Assert(chType == _C_SMC); token = tkSColon; break; case '[': Assert(chType == _C_LBR); token = tkLBrack; break; case ']': Assert(chType == _C_RBR); token = tkRBrack; break; case '~': Assert(chType == _C_TIL); token = tkTilde; break; case '?': Assert(chType == _C_QUE); token = tkQMark; break; case '{': Assert(chType == _C_LC); token = tkLCurly; break; // ES 2015 11.3 Line Terminators case '\r': case '\n': // kchLS: // kchPS: LNewLine: m_currentCharacter = p; ScanNewLine(ch); p = m_currentCharacter; m_fHadEol = TRUE; continue; LReserved: { // We will derive the PID from the token Assert(token < tkID); m_ptoken->SetIdentifier(NULL); goto LDone; } LEval: { token = tkID; if (!this->m_parser) goto LIdentifier; m_ptoken->SetIdentifier(this->m_parser->GetEvalPid()); goto LDone; } LArguments: { token = tkID; if (!this->m_parser) goto LIdentifier; m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid()); goto LDone; } LTarget: { token = tkID; if (!this->m_parser) goto LIdentifier; m_ptoken->SetIdentifier(this->m_parser->GetTargetPid()); goto LDone; } #include "kwd-swtch.h" case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': // Lower-case letters handled in kwd-swtch.h above during reserved word recognition. case '$': case '_': LIdentifier: Assert(this->charClassifier->IsIdStart(ch)); Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch)); token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p); break; case '`': Assert(chType == _C_BKQ); pchT = p; token = ScanStringTemplateBegin(&pchT); p = pchT; break; case '}': Assert(chType == _C_RC); token = tkRCurly; break; case '\\': pchT = p - 1; token = ScanIdentifier(identifyKwds, &pchT); if (tkScanError == token) { m_currentCharacter = p; Error(ERRillegalChar); } p = pchT; break; case ':': token = tkColon; break; case '=': token = tkAsg; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkEQ; if (this->PeekFirst(p, last) == '=') { p++; token = tkEqv; } break; case '>': p++; token = tkDArrow; break; } break; case '!': token = tkBang; if (this->PeekFirst(p, last) == '=') { p++; token = tkNE; if (this->PeekFirst(p, last) == '=') { p++; token = tkNEqv; } } break; case '+': token = tkAdd; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkAsgAdd; break; case '+': p++; token = tkInc; break; } break; case '-': token = tkSub; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkAsgSub; break; case '-': p++; token = tkDec; if (!m_fIsModuleCode) { // https://tc39.github.io/ecma262/#prod-annexB-MultiLineComment // If there was a new line in the multi-line comment, the text after --> is a comment. if ('>' == this->PeekFirst(p, last) && m_fHadEol) { goto LSkipLineComment; } } break; } break; case '*': token = tkStar; switch(this->PeekFirst(p, last)) { case '=' : p++; token = tkAsgMul; break; case '*' : if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled()) { break; } p++; token = tkExpo; if (this->PeekFirst(p, last) == '=') { p++; token = tkAsgExpo; } } break; case '/': token = tkDiv; switch(this->PeekFirst(p, last)) { case '=': p++; token = tkAsgDiv; break; case '/': if (p >= last) { AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?"); // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning // those, we have passed "last" already. Move back and return EOF. p = last; goto LEof; } ch = *++p; firstChar = (OLECHAR)ch; LSkipLineComment: pchT = NULL; for (;;) { switch ((ch = this->ReadFirst(p, last))) { case kchLS: // 0x2028, classifies as new line case kchPS: // 0x2029, classifies as new line LEcmaCommentLineBreak: // kchPS and kchLS are more than one unit in UTF-8. if (pchT) { // kchPS and kchLS are more than one unit in UTF-8. p = pchT; } else { // But only a single code unit in UTF16 p--; } this->RestoreMultiUnits(multiUnits); goto LCommentLineBreak; case kchNWL: case kchRET: p--; LCommentLineBreak: // Subtract the comment length from the total char count for the purpose // of deciding whether to defer AST and byte code generation. m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok)); break; case kchNUL: // Because we used ReadFirst, we have advanced p. The character that we are looking at is actually is p - 1. // If p == last, we are looking at p - 1, it is still within the source buffer, and we need to consider it part of the comment // Only if p > last that we have pass the source buffer and consider it a line break if (p > last) { p--; goto LCommentLineBreak; } continue; default: if (this->IsMultiUnitChar((OLECHAR)ch)) { pchT = p - 1; multiUnits = this->m_cMultiUnits; switch (ch = this->template ReadRest((OLECHAR)ch, p, last)) { case kchLS: case kchPS: goto LEcmaCommentLineBreak; } } continue; } break; } continue; case '*': ch = *++p; firstChar = (OLECHAR)ch; if ((p + 1) < last) { secondChar = (OLECHAR)(*(p + 1)); } else { secondChar = '\0'; } pchT = p; commentStartLine = m_line; bool containTypeDef; if (tkNone == (token = SkipComment(&pchT, &containTypeDef))) { // Subtract the comment length from the total char count for the purpose // of deciding whether to defer AST and byte code generation. m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok)); p = pchT; seenDelimitedCommentEnd = true; goto LLoop; } p = pchT; break; } break; case '%': Assert(chType == _C_PCT); token = tkPct; if (this->PeekFirst(p, last) == '=') { p++; token = tkAsgMod; } break; case '<': Assert(chType == _C_LT); token = tkLT; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkLE; break; case '<': p++; token = tkLsh; if (this->PeekFirst(p, last) == '=') { p++; token = tkAsgLsh; break; } break; case '!': // ES 2015 B.1.3 - HTML comments are only allowed when parsing non-module code. if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-') { // This is a "". If we are scanning // those, we have passed "last" already. Move back and return EOF. p = last; goto LEof; } firstChar = '!'; goto LSkipLineComment; } break; } break; case '>': Assert(chType == _C_GT); token = tkGT; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkGE; break; case '>': p++; token = tkRsh; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkAsgRsh; break; case '>': p++; token = tkRs2; if (*p == '=') { p++; token = tkAsgRs2; } break; } break; } break; case '^': Assert(chType == _C_XOR); token = tkXor; if (this->PeekFirst(p, last) == '=') { p++; token = tkAsgXor; } break; case '|': Assert(chType == _C_BAR); token = tkOr; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkAsgOr; break; case '|': p++; token = tkLogOr; break; } break; case '&': Assert(chType == _C_AMP); token = tkAnd; switch (this->PeekFirst(p, last)) { case '=': p++; token = tkAsgAnd; break; case '&': p++; token = tkLogAnd; break; } break; case '\'': case '"': Assert(chType == _C_QUO || chType == _C_APO); pchT = p; token = this->ScanStringConstant((OLECHAR)ch, &pchT); p = pchT; break; } break; } LDone: m_currentCharacter = p; return (m_ptoken->tk = token); } template IdentPtr Scanner::GetSecondaryBufferAsPid() { bool createPid = true; if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0) { createPid = false; } if (createPid) { return this->GetHashTbl()->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur); } else { return nullptr; } } template LPCOLESTR Scanner::StringFromLong(int32 lw) { _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10); return m_tempChBuf.m_prgch; } template IdentPtr Scanner::PidFromLong(int32 lw) { return this->GetHashTbl()->PidHashName(StringFromLong(lw)); } template LPCOLESTR Scanner::StringFromDbl(double dbl) { if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax)) { Error(ERRnoMemory); } return m_tempChBuf.m_prgch; } template IdentPtr Scanner::PidFromDbl(double dbl) { return this->GetHashTbl()->PidHashName(StringFromDbl(dbl)); } template void Scanner::Capture(_Out_ RestorePoint* restorePoint) { Capture(restorePoint, 0, 0); } template void Scanner::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr) { restorePoint->m_ichMinTok = this->IchMinTok(); restorePoint->m_ichMinLine = this->IchMinLine(); restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits; restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits; restorePoint->m_line = this->m_line; restorePoint->m_fHadEol = this->m_fHadEol; restorePoint->functionIdIncrement = functionIdIncrement; restorePoint->lengthDecr = lengthDecr; #ifdef DEBUG restorePoint->m_cMultiUnits = this->m_cMultiUnits; #endif } template void Scanner::SeekTo(const RestorePoint& restorePoint) { SeekAndScan(restorePoint); } template void Scanner::SeekToForcingPid(const RestorePoint& restorePoint) { SeekAndScan(restorePoint); } template template void Scanner::SeekAndScan(const RestorePoint& restorePoint) { this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits; this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits; this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits; this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits); if (forcePid) { this->ScanForcingPid(); } else { this->Scan(); } this->m_line = restorePoint.m_line; this->m_fHadEol = restorePoint.m_fHadEol; this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr); Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits); } template void Scanner::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId) { SeekTo(restorePoint); *nextFunctionId += restorePoint.functionIdIncrement; } // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred. template HRESULT Scanner::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine) { if( !pbstrLine ) { return E_POINTER; } // If we overflow the string, we have a serious problem... if (ichMinLine < 0 || static_cast(ichMinLine) > AdjustedLength() ) { return E_UNEXPECTED; } typename EncodingPolicy::EncodedCharPtr pStart = static_cast(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine); // Determine the length by scanning for the next newline size_t cb = 0; charcount_t cch = LineLength(pStart, m_pchLast, &cb); Assert(cch <= LONG_MAX); typename EncodingPolicy::EncodedCharPtr pEnd = static_cast(ichMinLine) == IchMinLine() ? m_pchMinLine + cb : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch); *pbstrLine = SysAllocStringLen(NULL, cch); if (!*pbstrLine) { return E_OUTOFMEMORY; } this->ConvertToUnicode(*pbstrLine, cch, pStart, pEnd); return S_OK; } template class Scanner;