| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "ParserPch.h"
- /*****************************************************************************
- *
- * The following table speeds various tests of characters, such as whether
- * a given character can be part of an identifier, and so on.
- */
- int CountNewlines(LPCOLESTR psz)
- {
- int cln = 0;
- while (0 != *psz)
- {
- switch (*psz++)
- {
- case _u('\xD'):
- if (*psz == _u('\xA'))
- {
- ++psz;
- }
- // fall-through
- case _u('\xA'):
- cln++;
- break;
- }
- }
- return cln;
- }
- BOOL Token::IsKeyword() const
- {
- // keywords (but not future reserved words)
- return (tk <= tkYIELD);
- }
- tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser)
- {
- Assert(parser);
- if(pattern)
- parser->RegisterRegexPattern(pattern);
- this->u.pattern = pattern;
- return tk = tkRegExp;
- }
- IdentPtr Token::CreateIdentifier(HashTbl * hashTbl)
- {
- Assert(this->u.pid == nullptr);
- if (this->u.pchMin)
- {
- Assert(IsIdentifier());
- IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.pchMin + this->u.length, this->u.length);
- this->u.pid = pid;
- return pid;
- }
- Assert(IsReservedWord());
- IdentPtr pid = hashTbl->PidFromTk(tk);
- this->u.pid = pid;
- return pid;
- }
- template <typename EncodingPolicy>
- Scanner<EncodingPolicy>::Scanner(Parser* parser, Token *ptoken, Js::ScriptContext* scriptContext)
- {
- Assert(ptoken);
- m_parser = parser;
- m_ptoken = ptoken;
- m_scriptContext = scriptContext;
- m_tempChBuf.m_pscanner = this;
- m_tempChBufSecondary.m_pscanner = this;
- this->charClassifier = scriptContext->GetCharClassifier();
- this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled();
- ClearStates();
- }
- template <typename EncodingPolicy>
- Scanner<EncodingPolicy>::~Scanner(void)
- {
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::ClearStates()
- {
- m_pchBase = nullptr;
- m_pchLast = nullptr;
- m_pchMinLine = nullptr;
- m_pchMinTok = nullptr;
- m_currentCharacter = nullptr;
- m_pchPrevLine = nullptr;
- m_cMinTokMultiUnits = 0;
- m_cMinLineMultiUnits = 0;
- m_fStringTemplateDepth = 0;
- m_fHadEol = FALSE;
- m_fIsModuleCode = FALSE;
- m_doubleQuoteOnLastTkStrCon = FALSE;
- m_OctOrLeadingZeroOnLastTKNumber = false;
- m_EscapeOnLastTkStrCon = false;
- m_fNextStringTemplateIsTagged = false;
- m_DeferredParseFlags = ScanFlagNone;
- m_fYieldIsKeywordRegion = false;
- m_fAwaitIsKeywordRegion = false;
- m_line = 0;
- m_scanState = ScanStateNormal;
- m_ichMinError = 0;
- m_ichLimError = 0;
- m_startLine = 0;
- m_pchStartLine = NULL;
- m_iecpLimTokPrevious = (size_t)-1;
- m_ichLimTokPrevious = (charcount_t)-1;
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::Clear()
- {
- EncodingPolicy::Clear();
- ClearStates();
- this->m_tempChBuf.Clear();
- this->m_tempChBufSecondary.Clear();
- }
- /*****************************************************************************
- *
- * Initializes the scanner to prepare to scan the given source text.
- */
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber)
- {
- // Save the start of the script and add the offset to get the point where we should start scanning.
- m_pchBase = pszSrc;
- m_pchLast = m_pchBase + offset + length;
- m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset;
- this->RestoreMultiUnits(offset - charOffset);
- // Absorb any byte order mark at the start
- if(offset == 0)
- {
- switch( this->PeekFull(m_currentCharacter, m_pchLast) )
- {
- case 0xFFEE: // "Opposite" endian BOM
- // We do not support big-endian encodings
- // fall-through
- case 0xFEFF: // "Correct" BOM
- this->template ReadFull<true>(m_currentCharacter, m_pchLast);
- break;
- }
- }
- m_line = lineNumber;
- m_startLine = lineNumber;
- m_pchStartLine = m_currentCharacter;
- m_ptoken->tk = tkNone;
- m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0;
- m_fHadEol = FALSE;
- m_DeferredParseFlags = ScanFlagNone;
- this->SetIsUtf8(isUtf8);
- }
- #if ENABLE_BACKGROUND_PARSING
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::PrepareForBackgroundParse(Js::ScriptContext *scriptContext)
- {
- scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
- scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
- }
- #endif
- //-----------------------------------------------------------------------------
- // Number of code points from 'first' up to, but not including the next
- // newline character, embedded NUL, or 'last', depending on which comes first.
- //
- // This is used to determine a length of BSTR, which can't contain a NUL character.
- //-----------------------------------------------------------------------------
- template <typename EncodingPolicy>
- charcount_t Scanner<EncodingPolicy>::LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb)
- {
- Assert(cb != nullptr);
- charcount_t result = 0;
- EncodedCharPtr p = first;
- for (;;)
- {
- EncodedCharPtr prev = p;
- switch( this->template ReadFull<false>(p, last) )
- {
- case kchNWL: // _C_NWL
- case kchRET:
- case kchLS:
- case kchPS:
- case kchNUL: // _C_NUL
- // p is now advanced past the line terminator character.
- // We need to know the number of bytes making up the line, not including the line terminator character.
- // To avoid subtracting a variable number of bytes because the line terminator characters are different
- // number of bytes long (plus there may be multiple valid encodings for these characters) just keep
- // track of the first byte of the line terminator character in prev.
- Assert(prev >= first);
- *cb = prev - first;
- return result;
- }
- result++;
- }
- }
- template <typename EncodingPolicy>
- charcount_t Scanner<EncodingPolicy>::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd)
- {
- EncodedCharPtr p = start;
- charcount_t ich = ichStart;
- int32 current = line;
- charcount_t lastStart = ichStart;
- while (ich < ichEnd)
- {
- ich++;
- switch (this->template ReadFull<false>(p, last))
- {
- case kchRET:
- if (this->PeekFull(p, last) == kchNWL)
- {
- ich++;
- this->template ReadFull<false>(p, last);
- }
- // fall-through
- case kchNWL:
- case kchLS:
- case kchPS:
- current++;
- lastStart = ich;
- break;
- case kchNUL:
- goto done;
- }
- }
- done:
- line = current;
- return lastStart;
- }
- template <typename EncodingPolicy>
- bool Scanner<EncodingPolicy>::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar)
- {
- Assert(outChar != nullptr);
- Assert(startingLocation <= endOfSource);
- EncodedCharPtr currentLocation = startingLocation;
- codepoint_t charToOutput = 0x0;
- // '\' is Assumed as there is only one caller
- // Read 'u' characters
- if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u')
- {
- return false;
- }
- bool expectCurly = false;
- if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode)
- {
- expectCurly = true;
- // Move past the character
- this->ReadFirst(currentLocation, endOfSource);
- }
- uint i = 0;
- OLECHAR ch = 0;
- int hexValue = 0;
- uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u);
- for(; i < maxHexDigits && currentLocation < endOfSource; i++)
- {
- if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue))
- {
- break;
- }
- charToOutput = charToOutput * 0x10 + hexValue;
- if (charToOutput > 0x10FFFF)
- {
- return false;
- }
- }
- //At least 4 characters have to be read
- if (i == 0 || (i != 4 && !expectCurly))
- {
- return false;
- }
- Assert(expectCurly ? es6UnicodeMode : true);
- if (expectCurly && ch != '}')
- {
- return false;
- }
- *outChar = charToOutput;
- startingLocation = currentLocation;
- return true;
- }
- template <typename EncodingPolicy>
- template <bool bScan>
- bool Scanner<EncodingPolicy>::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar)
- {
- Assert(outChar != nullptr);
- Assert(outContainsMultiUnitChar != nullptr);
- Assert(es6UnicodeMode);
- Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower));
- EncodedCharPtr currentLocation = startingLocation;
- *outChar = lower;
- if (currentLocation < endOfSource)
- {
- size_t restorePoint = this->m_cMultiUnits;
- codepoint_t upper = this->template ReadFull<bScan>(currentLocation, endOfSource);
- if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
- {
- *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper);
- if (this->IsMultiUnitChar(static_cast<OLECHAR>(upper)))
- {
- *outContainsMultiUnitChar = true;
- }
- startingLocation = currentLocation;
- }
- else
- {
- this->RestoreMultiUnits(restorePoint);
- }
- }
- return true;
- }
- template <typename EncodingPolicy>
- template <bool bScan>
- inline bool Scanner<EncodingPolicy>::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar)
- {
- Assert(outChar != nullptr);
- Assert(outContainsMultiUnitChar != nullptr);
- if (startingLocation >= endOfSource)
- {
- return false;
- }
- codepoint_t ch = this->template ReadFull<bScan>(startingLocation, endOfSource);
- if (FBigChar(ch))
- {
- if (this->IsMultiUnitChar(static_cast<OLECHAR>(ch)))
- {
- *outContainsMultiUnitChar = true;
- }
- if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
- {
- return TryReadCodePointRest<bScan>(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar);
- }
- }
- else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch))
- {
- *hasEscape = true;
- }
- *outChar = ch;
- return true;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp)
- {
- EncodedCharPtr p = *pp;
- EncodedCharPtr pchMin = p;
- // JS6 allows unicode characters in the form of \uxxxx escape sequences
- // to be part of the identifier.
- bool fHasEscape = false;
- bool fHasMultiChar = false;
- codepoint_t codePoint = INVALID_CODEPOINT;
- size_t multiUnitsBeforeLast = this->m_cMultiUnits;
- // Check if we started the id
- if (!TryReadCodePoint<true>(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar))
- {
- // If no chars. could be scanned as part of the identifier, return error.
- return tkScanError;
- }
- Assert(codePoint < 0x110000u);
- if (!charClassifier->IsIdStart(codePoint))
- {
- // Put back the last character
- this->RestoreMultiUnits(multiUnitsBeforeLast);
- // If no chars. could be scanned as part of the identifier, return error.
- return tkScanError;
- }
- return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp);
- }
- template <typename EncodingPolicy>
- BOOL Scanner<EncodingPolicy>::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last)
- {
- if (EncodingPolicy::MultiUnitEncoding)
- {
- while (p < last)
- {
- EncodedChar currentChar = *p;
- if (this->IsMultiUnitChar(currentChar))
- {
- // multi unit character, we may not have reach the end yet
- return FALSE;
- }
- Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast<false>(currentChar));
- if (!charClassifier->IsIdContinueFast<false>(currentChar))
- {
- // only reach the end of the identifier if it is not the start of an escape sequence
- return currentChar != '\\';
- }
- p++;
- }
- // We have reach the end of the identifier.
- return TRUE;
- }
- // Not fast path for non multi unit encoding
- return false;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar,
- EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp)
- {
- EncodedCharPtr last = m_pchLast;
- while (true)
- {
- // Fast path for utf8, non-multi unit char and not escape
- if (FastIdentifierContinue(p, last))
- {
- break;
- }
- // Slow path that has to deal with multi unit encoding
- codepoint_t codePoint = INVALID_CODEPOINT;
- EncodedCharPtr pchBeforeLast = p;
- size_t multiUnitsBeforeLast = this->m_cMultiUnits;
- if (TryReadCodePoint<true>(p, last, &codePoint, &fHasEscape, &fHasMultiChar))
- {
- Assert(codePoint < 0x110000u);
- if (charClassifier->IsIdContinue(codePoint))
- {
- continue;
- }
- }
- // Put back the last character
- p = pchBeforeLast;
- this->RestoreMultiUnits(multiUnitsBeforeLast);
- break;
- }
- m_lastIdentifierHasEscape = fHasEscape;
- Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX);
- *pp = p;
- if (!identifyKwds)
- {
- return tkID;
- }
- // UTF16 Scanner are only for syntax coloring, so it shouldn't come here.
- if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape)
- {
- Assert(sizeof(EncodedChar) == 1);
- // If there are no escape, that the main scan loop would have found the keyword already
- // So we can just assume it is an ID
- DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
- DebugOnly(tokens tk = Ident::TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
- Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()));
- m_ptoken->SetIdentifier(reinterpret_cast<const char *>(pchMin), (int32)(p - pchMin));
- return tkID;
- }
- IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar);
- m_ptoken->SetIdentifier(pid);
- if (!fHasEscape)
- {
- // If it doesn't have escape, then Scan() should have taken care of keywords (except
- // yield if m_fYieldIsKeyword is false, in which case yield is treated as an identifier, and except
- // await if m_fAwaitIsKeyword is false, in which case await is treated as an identifier).
- // We don't have to check if the name is reserved word and return it as an Identifier
- Assert(pid->Tk(IsStrictMode()) == tkID
- || (pid->Tk(IsStrictMode()) == tkYIELD && !this->YieldIsKeyword())
- || (pid->Tk(IsStrictMode()) == tkAWAIT && !this->AwaitIsKeyword()));
- return tkID;
- }
- tokens tk = pid->Tk(IsStrictMode());
- return tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()) ? tkID : tkNone;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidAt(size_t iecpMin, size_t iecpLim)
- {
- Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin);
- return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim);
- }
- template <typename EncodingPolicy>
- uint32 Scanner<EncodingPolicy>::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last)
- {
- m_tempChBuf.Reset();
- while( p < last )
- {
- codepoint_t codePoint;
- bool hasEscape, isMultiChar;
- bool gotCodePoint = TryReadCodePoint<false>(p, last, &codePoint, &hasEscape, &isMultiChar);
- Assert(gotCodePoint);
- Assert(codePoint < 0x110000);
- if (codePoint < 0x10000)
- {
- m_tempChBuf.AppendCh((OLECHAR)codePoint);
- }
- else
- {
- char16 lower, upper;
- Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper);
- m_tempChBuf.AppendCh(lower);
- m_tempChBuf.AppendCh(upper);
- }
- }
- return m_tempChBuf.m_ichCur;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last)
- {
- int32 cch = UnescapeToTempBuf(p, last);
- return this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, cch);
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar)
- {
- // If there is an escape sequence in the JS6 identifier or it is a UTF8
- // source then we have to convert it to the equivalent char so we use a
- // buffer for translation.
- if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape)
- {
- return PidOfIdentiferAt(p, last);
- }
- else if (EncodingPolicy::MultiUnitEncoding)
- {
- Assert(sizeof(EncodedChar) == 1);
- return this->GetHashTbl()->PidHashNameLen(reinterpret_cast<const char *>(p), reinterpret_cast<const char *>(last), (int32)(last - p));
- }
- else
- {
- Assert(sizeof(EncodedChar) == 2);
- return this->GetHashTbl()->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p));
- }
- }
- template <typename EncodingPolicy>
- typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, LikelyNumberType& likelyType, size_t savedMultiUnits)
- {
- EncodedCharPtr last = m_pchLast;
- EncodedCharPtr pchT = nullptr;
- bool baseSpecified = false;
- likelyType = LikelyNumberType::Int;
- // Reset
- m_OctOrLeadingZeroOnLastTKNumber = false;
- auto baseSpecifierCheck = [&pchT, &pdbl, p, &baseSpecified]()
- {
- if (pchT == p + 2)
- {
- // An octal token '0' was followed by a base specifier: /0[xXoObB]/
- // This literal can no longer be a double
- *pdbl = 0;
- // Advance the character pointer to the base specifier
- pchT = p + 1;
- // Set the flag so we know to offset the potential identifier search after the literal
- baseSpecified = true;
- }
- };
- if ('0' == this->PeekFirst(p, last))
- {
- switch(this->PeekFirst(p + 1, last))
- {
- case '.':
- case 'e':
- case 'E':
- case 'n':
- likelyType = LikelyNumberType::Double;
- // Floating point
- goto LFloat;
- case 'x':
- case 'X':
- // Hex
- *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT, m_scriptContext->GetConfig()->IsESNumericSeparatorEnabled());
- baseSpecifierCheck();
- goto LIdCheck;
- case 'o':
- case 'O':
- // Octal
- *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT, m_scriptContext->GetConfig()->IsESNumericSeparatorEnabled());
- baseSpecifierCheck();
- goto LIdCheck;
- case 'b':
- case 'B':
- // Binary
- *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT, m_scriptContext->GetConfig()->IsESNumericSeparatorEnabled());
- baseSpecifierCheck();
- goto LIdCheck;
- default:
- // Octal
- *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT);
- Assert(pchT > p);
- #if !SOURCERELEASE
- // If an octal literal is malformed then it is in fact a decimal literal.
- #endif // !SOURCERELEASE
- if(*pdbl != 0 || pchT > p + 1)
- m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
- switch (*pchT)
- {
- case '8':
- case '9':
- // case 'e':
- // case 'E':
- // case '.':
- m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
- goto LFloat;
- }
- goto LIdCheck;
- }
- }
- else
- {
- LFloat:
- *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyType, m_scriptContext->GetConfig()->IsESBigIntEnabled(), m_scriptContext->GetConfig()->IsESNumericSeparatorEnabled());
- Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl));
- if (likelyType == LikelyNumberType::BigInt)
- {
- Assert(*pdbl == 0);
- }
- // fall through to LIdCheck
- }
- LIdCheck:
- // https://tc39.github.io/ecma262/#sec-literals-numeric-literals
- // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
- // For example : 3in is an error and not the two input elements 3 and in
- // If a base was speficied, use the first character denoting the constant. In this case, pchT is pointing to the base specifier.
- EncodedCharPtr startingLocation = baseSpecified ? pchT + 1 : pchT;
- codepoint_t outChar = *startingLocation;
- if (this->IsMultiUnitChar((OLECHAR)outChar))
- {
- outChar = this->template ReadRest<true>((OLECHAR)outChar, startingLocation, last);
- }
- if (this->charClassifier->IsIdStart(outChar))
- {
- this->RestoreMultiUnits(savedMultiUnits);
- Error(ERRIdAfterLit);
- }
- // IsIdStart does not cover the unicode escape case. Try to read a unicode escape from the 'u' char.
- if (*pchT == '\\')
- {
- startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand.
- if (TryReadEscape(startingLocation, m_pchLast, &outChar))
- {
- this->RestoreMultiUnits(savedMultiUnits);
- Error(ERRIdAfterLit);
- }
- }
- if (Js::NumberUtilities::IsDigit(*startingLocation))
- {
- this->RestoreMultiUnits(savedMultiUnits);
- Error(ERRbadNumber);
- }
- return pchT;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::TryRescanRegExp()
- {
- EncodedCharPtr current = m_currentCharacter;
- tokens result = RescanRegExp();
- if (result == tkScanError)
- m_currentCharacter = current;
- return result;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::RescanRegExp()
- {
- #if DEBUG
- switch (m_ptoken->tk)
- {
- case tkDiv:
- Assert(m_currentCharacter == m_pchMinTok + 1);
- break;
- case tkAsgDiv:
- Assert(m_currentCharacter == m_pchMinTok + 2);
- break;
- default:
- AssertMsg(FALSE, "Who is calling RescanRegExp?");
- break;
- }
- #endif //DEBUG
- m_currentCharacter = m_pchMinTok;
- if (*m_currentCharacter != '/')
- Error(ERRnoSlash);
- m_currentCharacter++;
- tokens tk = tkNone;
- {
- ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
- tk = ScanRegExpConstant(&alloc);
- }
- return tk;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::RescanRegExpNoAST()
- {
- #if DEBUG
- switch (m_ptoken->tk)
- {
- case tkDiv:
- Assert(m_currentCharacter == m_pchMinTok + 1);
- break;
- case tkAsgDiv:
- Assert(m_currentCharacter == m_pchMinTok + 2);
- break;
- default:
- AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
- break;
- }
- #endif //DEBUG
- m_currentCharacter = m_pchMinTok;
- if (*m_currentCharacter != '/')
- Error(ERRnoSlash);
- m_currentCharacter++;
- tokens tk = tkNone;
- {
- ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
- {
- tk = ScanRegExpConstantNoAST(&alloc);
- }
- }
- return tk;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::RescanRegExpTokenizer()
- {
- #if DEBUG
- switch (m_ptoken->tk)
- {
- case tkDiv:
- Assert(m_currentCharacter == m_pchMinTok + 1);
- break;
- case tkAsgDiv:
- Assert(m_currentCharacter == m_pchMinTok + 2);
- break;
- default:
- AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
- break;
- }
- #endif //DEBUG
- m_currentCharacter = m_pchMinTok;
- if (*m_currentCharacter != '/')
- Error(ERRnoSlash);
- m_currentCharacter++;
- tokens tk = tkNone;
- ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread();
- threadContext->EnsureRecycler();
- Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp"));
- TryFinally(
- [&]() /* try block */
- {
- tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator());
- },
- [&](bool /* hasException */) /* finally block */
- {
- threadContext->ReleaseTemporaryAllocator(alloc);
- });
- return tk;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanRegExpConstant(ArenaAllocator* alloc)
- {
- PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
- // SEE ALSO: RegexHelper::PrimCompileDynamic()
- #ifdef PROFILE_EXEC
- m_scriptContext->ProfileBegin(Js::RegexCompilePhase);
- #endif
- ArenaAllocator* ctAllocator = alloc;
- UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
- UnifiedRegex::StandardChars<char16>* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- UnifiedRegex::DebugWriter *w = 0;
- if (REGEX_CONFIG_FLAG(RegexDebug))
- w = m_scriptContext->GetRegexDebugWriter();
- if (REGEX_CONFIG_FLAG(RegexProfile))
- m_scriptContext->GetRegexStatsDatabase()->BeginProfile();
- #endif
- UnifiedRegex::Node* root = 0;
- charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
- UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
- UnifiedRegex::Parser<EncodingPolicy, true> parser
- ( m_scriptContext
- , ctAllocator
- , standardEncodedChars
- , standardChars
- , this->IsUtf8()
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , w
- #endif
- );
- try
- {
- root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags);
- }
- catch (UnifiedRegex::ParseError e)
- {
- #ifdef PROFILE_EXEC
- m_scriptContext->ProfileEnd(Js::RegexCompilePhase);
- #endif
- m_currentCharacter += e.encodedPos;
- Error(e.error);
- }
- UnifiedRegex::RegexPattern* pattern;
- if (m_parser->IsBackgroundParser())
- {
- // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern
- // and hook it to this parse node.
- pattern = parser.template CompileProgram<false>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
- }
- else
- {
- pattern = parser.template CompileProgram<true>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
- }
- this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
- return m_ptoken->SetRegex(pattern, m_parser);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanRegExpConstantNoAST(ArenaAllocator* alloc)
- {
- PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
- ThreadContext *threadContext = m_scriptContext->GetThreadContext();
- UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0);
- UnifiedRegex::StandardChars<char16>* standardChars = threadContext->GetStandardChars((char16*)0);
- charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
- UnifiedRegex::Parser<EncodingPolicy, true> parser
- ( m_scriptContext
- , alloc
- , standardEncodedChars
- , standardChars
- , this->IsUtf8()
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , 0
- #endif
- );
- try
- {
- parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars);
- }
- catch (UnifiedRegex::ParseError e)
- {
- m_currentCharacter += e.encodedPos;
- Error(e.error);
- // never reached
- }
- UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram<false>(nullptr, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, UnifiedRegex::NoRegexFlags);
- Assert(pattern == nullptr); // BuildAST == false, CompileProgram should return nullptr
- this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
- return (m_ptoken->tk = tkRegExp);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanStringTemplateBegin(EncodedCharPtr *pp)
- {
- // String template must begin with a string constant followed by '`' or '${'
- ScanStringConstant<true, true>('`', pp);
- OLECHAR ch;
- EncodedCharPtr last = m_pchLast;
- ch = this->ReadFirst(*pp, last);
- if (ch == '`')
- {
- // Simple string template - no substitutions
- return tkStrTmplBasic;
- }
- else if (ch == '$')
- {
- ch = this->ReadFirst(*pp, last);
- if (ch == '{')
- {
- // Next token after expr should be tkStrTmplMid or tkStrTmplEnd.
- // In string template scanning mode, we expect the next char to be '}'
- // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid
- m_fStringTemplateDepth++;
- // Regular string template begin - next is first substitution
- return tkStrTmplBegin;
- }
- }
- // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
- (*pp)--;
- return ScanError(m_currentCharacter, tkStrTmplBegin);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp)
- {
- // String template middle and end tokens must begin with a string constant
- ScanStringConstant<true, true>('`', pp);
- OLECHAR ch;
- EncodedCharPtr last = m_pchLast;
- ch = this->ReadFirst(*pp, last);
- if (ch == '`')
- {
- // No longer in string template scanning mode
- m_fStringTemplateDepth--;
- // This is the last part of the template ...`
- return tkStrTmplEnd;
- }
- else if (ch == '$')
- {
- ch = this->ReadFirst(*pp, last);
- if (ch == '{')
- {
- // This is just another middle part of the template }...${
- return tkStrTmplMid;
- }
- }
- // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
- (*pp)--;
- return ScanError(m_currentCharacter, tkStrTmplEnd);
- }
- /*****************************************************************************
- *
- * Parses a string constant. Note that the string value is stored in
- * a volatile buffer (or allocated on the heap if too long), and thus
- * the string should be saved off before the next token is scanned.
- */
- template<typename EncodingPolicy>
- template<bool stringTemplateMode, bool createRawString>
- tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
- {
- static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value");
- OLECHAR ch, c, rawch;
- int wT;
- EncodedCharPtr p = *pp;
- EncodedCharPtr last = m_pchLast;
- // Reset
- m_OctOrLeadingZeroOnLastTKNumber = false;
- m_EscapeOnLastTkStrCon = FALSE;
- m_tempChBuf.Reset();
- // Use template parameter to gate raw string creation.
- // If createRawString is false, all these operations should be no-ops
- if (createRawString)
- {
- m_tempChBufSecondary.Reset();
- }
- for (;;)
- {
- switch ((rawch = ch = this->ReadFirst(p, last)))
- {
- case kchRET:
- if (stringTemplateMode)
- {
- if (this->PeekFirst(p, last) == kchNWL)
- {
- // Eat the <LF> char, ignore return
- this->ReadFirst(p, last);
- }
- // Both <CR> and <CR><LF> are normalized to <LF> in template cooked and raw values
- ch = rawch = kchNWL;
- }
- // Fall through
- case kchNWL:
- if (stringTemplateMode)
- {
- // Notify the scanner to update current line, number of lines etc
- NotifyScannedNewLine();
- // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out.
- m_pchMinLine = p;
- break;
- }
- m_currentCharacter = p - 1;
- Error(ERRnoStrEnd);
- case '"':
- case '\'':
- if (ch == delim)
- goto LBreak;
- break;
- case '`':
- // In string template scan mode, don't consume the '`' - we need to differentiate
- // between a closed string template and the expression open sequence - ${
- if (stringTemplateMode)
- {
- p--;
- goto LBreak;
- }
- // If we aren't scanning for a string template, do the default thing
- goto LMainDefault;
- case '$':
- // If we are parsing a string literal part of a string template, ${ indicates we need to switch
- // to parsing an expression.
- if (stringTemplateMode && this->PeekFirst(p, last) == '{')
- {
- // Rewind to the $ and return
- p--;
- goto LBreak;
- }
- // If we aren't scanning for a string template, do the default thing
- goto LMainDefault;
- case kchNUL:
- if (p > last)
- {
- m_currentCharacter = p - 1;
- Error(ERRnoStrEnd);
- }
- break;
- default:
- LMainDefault:
- if (this->IsMultiUnitChar(ch))
- {
- rawch = ch = this->template ReadRest<true>(ch, p, last);
- }
- break;
- case kchBSL:
- // In raw mode '\\' is not an escape character, just add the char into the raw buffer.
- m_tempChBufSecondary.template AppendCh<createRawString>(ch);
- m_EscapeOnLastTkStrCon=TRUE;
- // In raw mode, we append the raw char itself and not the escaped value so save the char.
- rawch = ch = this->ReadFirst(p, last);
- codepoint_t codePoint = 0;
- uint errorType = (uint)ERRbadHexDigit;
- switch (ch)
- {
- case 'b':
- ch = 0x08;
- break;
- case 't':
- ch = 0x09;
- break;
- case 'v':
- ch = 0x0B; //Only in ES5 mode
- break; //same as default
- case 'n':
- ch = 0x0A;
- break;
- case 'f':
- ch = 0x0C;
- break;
- case 'r':
- ch = 0x0D;
- break;
- case 'x':
- // Insert the 'x' here before jumping to parse the hex digits.
- m_tempChBufSecondary.template AppendCh<createRawString>(ch);
- // 2 hex digits
- ch = 0;
- goto LTwoHex;
- case 'u':
- // Raw string just inserts a 'u' here.
- m_tempChBufSecondary.template AppendCh<createRawString>(ch);
- ch = 0;
- if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto LFourHex;
- else if (c != '{' || !this->es6UnicodeMode)
- goto ReturnScanError;
- Assert(c == '{');
- // c should definitely be a '{' which should be appended to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- //At least one digit is expected
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- {
- goto ReturnScanError;
- }
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint = static_cast<codepoint_t>(wT);
- while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- {
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint <<= 4;
- codePoint += static_cast<codepoint_t>(wT);
- if (codePoint > 0x10FFFF)
- {
- errorType = (uint)ERRInvalidCodePoint;
- goto ReturnScanError;
- }
- }
- if (c != '}')
- {
- errorType = (uint)ERRMissingCurlyBrace;
- goto ReturnScanError;
- }
- Assert(codePoint <= 0x10FFFF);
- if (codePoint >= 0x10000)
- {
- OLECHAR lower = 0;
- Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch);
- m_tempChBuf.AppendCh(lower);
- }
- else
- {
- ch = (char16)codePoint;
- }
- // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
- if (createRawString)
- rawch = c;
- break;
- LFourHex:
- codePoint = 0x0;
- // Append first hex digit character to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint += static_cast<codepoint_t>(wT * 0x1000);
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto ReturnScanError;
- // Append fourth (or second) hex digit character to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint += static_cast<codepoint_t>(wT * 0x0100);
- LTwoHex:
- // This code path doesn't expect curly.
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto ReturnScanError;
- // Append first hex digit character to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint += static_cast<codepoint_t>(wT * 0x0010);
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto ReturnScanError;
- codePoint += static_cast<codepoint_t>(wT);
- // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
- if (createRawString)
- rawch = c;
- if (codePoint < 0x10000)
- {
- ch = static_cast<OLECHAR>(codePoint);
- }
- else
- {
- goto ReturnScanError;
- }
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- // 1 to 3 octal digits
- ch -= '0';
- // Octal escape sequences are not allowed inside string template literals
- if (stringTemplateMode)
- {
- c = this->PeekFirst(p, last);
- if (ch != 0 || (c >= '0' && c <= '7'))
- {
- errorType = (uint)ERRES5NoOctal;
- goto ReturnScanError;
- }
- break;
- }
- wT = (c = this->ReadFirst(p, last)) - '0';
- if ((char16)wT > 7)
- {
- if (ch != 0 || ((char16)wT <= 9))
- {
- m_OctOrLeadingZeroOnLastTKNumber = true;
- }
- p--;
- break;
- }
- m_OctOrLeadingZeroOnLastTKNumber = true;
- ch = static_cast< OLECHAR >(ch * 8 + wT);
- goto LOneOctal;
- case '4':
- case '5':
- case '6':
- case '7':
- // 1 to 2 octal digits
- // Octal escape sequences are not allowed inside string template literals
- if (stringTemplateMode)
- {
- errorType = (uint)ERRES5NoOctal;
- goto ReturnScanError;
- }
- ch -= '0';
- m_OctOrLeadingZeroOnLastTKNumber = true;
- LOneOctal:
- wT = (c = this->ReadFirst(p, last)) - '0';
- if ((char16)wT > 7)
- {
- p--;
- break;
- }
- ch = static_cast< OLECHAR >(ch * 8 + wT);
- break;
- case kchRET: // 0xD
- if (stringTemplateMode)
- {
- // If this is \<CR><LF> we can eat the <LF> right now
- if (this->PeekFirst(p, last) == kchNWL)
- {
- // Eat the <LF> char, ignore return
- this->ReadFirst(p, last);
- }
- // Both \<CR> and \<CR><LF> are normalized to \<LF> in template raw string
- rawch = kchNWL;
- }
- case kchLS: // 0x2028, classifies as new line
- case kchPS: // 0x2029, classifies as new line
- case kchNWL: // 0xA
- LEcmaEscapeLineBreak:
- if (stringTemplateMode)
- {
- // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings
- m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
- // Template literal strings ignore all escaped line continuation tokens
- NotifyScannedNewLine();
- // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out.
- m_pchMinLine = p;
- continue;
- }
- m_currentCharacter = p;
- ScanNewLine(ch);
- p = m_currentCharacter;
- continue;
- case 0:
- if (p >= last)
- {
- errorType = (uint)ERRnoStrEnd;
- ReturnScanError:
- m_currentCharacter = p - 1;
- Error(errorType);
- }
- else if (stringTemplateMode)
- {
- // Escaped null character is translated into 0x0030 for raw template literals
- rawch = 0x0030;
- }
- break;
- default:
- if (this->IsMultiUnitChar(ch))
- {
- rawch = ch = this->template ReadRest<true>(ch, p, last);
- switch (ch)
- {
- case kchLS:
- case kchPS:
- goto LEcmaEscapeLineBreak;
- }
- }
- break;
- }
- break;
- }
- m_tempChBuf.AppendCh(ch);
- m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
- }
- LBreak:
- bool createPid = true;
- if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
- {
- createPid = false;
- if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR))))
- {
- createPid = true;
- }
- }
- if (createPid)
- {
- m_ptoken->SetIdentifier(this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur));
- }
- else
- {
- m_ptoken->SetIdentifier(NULL);
- }
- m_scanState = ScanStateNormal;
- m_doubleQuoteOnLastTkStrCon = '"' == delim;
- *pp = p;
- return tkStrCon;
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
- {
- return ScanStringConstant<false, false>(delim, pp);
- }
- /*****************************************************************************
- *
- * Consume a C-style comment.
- */
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef)
- {
- Assert(containTypeDef != nullptr);
- EncodedCharPtr p = *pp;
- *containTypeDef = false;
- EncodedCharPtr last = m_pchLast;
- OLECHAR ch;
- for (;;)
- {
- switch((ch = this->ReadFirst(p, last)))
- {
- case '*':
- if (*p == '/')
- {
- *pp = p + 1;
- return tkNone;
- }
- break;
- // ES 2015 11.3 Line Terminators
- case kchLS: // 0x2028, classifies as new line
- case kchPS: // 0x2029, classifies as new line
- LEcmaLineBreak:
- goto LLineBreak;
- case kchRET:
- case kchNWL:
- LLineBreak:
- m_fHadEol = TRUE;
- m_currentCharacter = p;
- ScanNewLine(ch);
- p = m_currentCharacter;
- break;
- case kchNUL:
- if (p >= last)
- {
- m_currentCharacter = p - 1;
- *pp = p - 1;
- Error(ERRnoCmtEnd);
- }
- break;
- default:
- if (this->IsMultiUnitChar(ch))
- {
- ch = this->template ReadRest<true>(ch, p, last);
- switch (ch)
- {
- case kchLS:
- case kchPS:
- goto LEcmaLineBreak;
- }
- }
- break;
- }
- }
- }
- /*****************************************************************************
- *
- * We've encountered a newline - update various counters and things.
- */
- template<typename EncodingPolicy>
- void Scanner<EncodingPolicy>::ScanNewLine(uint ch)
- {
- if (ch == '\r' && PeekNextChar() == '\n')
- {
- ReadNextChar();
- }
- NotifyScannedNewLine();
- }
- /*****************************************************************************
- *
- * We've encountered a newline - update various counters and things.
- */
- template<typename EncodingPolicy>
- void Scanner<EncodingPolicy>::NotifyScannedNewLine()
- {
- // update in scanner: previous line, current line, number of lines.
- m_line++;
- m_pchPrevLine = m_pchMinLine;
- m_pchMinLine = m_currentCharacter;
- m_cMinLineMultiUnits = this->m_cMultiUnits;
- }
- /*****************************************************************************
- *
- * Delivers a token stream.
- */
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanForcingPid()
- {
- if (m_DeferredParseFlags != ScanFlagNone)
- {
- BYTE deferredParseFlagsSave = m_DeferredParseFlags;
- m_DeferredParseFlags = ScanFlagNone;
- tokens result = tkEOF;
- TryFinally(
- [&]() /* try block */
- {
- result = this->Scan();
- },
- [&](bool) /* finally block */
- {
- this->m_DeferredParseFlags = deferredParseFlagsSave;
- });
- return result;
- }
- return Scan();
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::Scan()
- {
- return ScanCore(true);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanNoKeywords()
- {
- return ScanCore(false);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanAhead()
- {
- return ScanNoKeywords();
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
- {
- codepoint_t ch;
- OLECHAR firstChar;
- OLECHAR secondChar;
- EncodedCharPtr pchT;
- size_t multiUnits = 0;
- EncodedCharPtr p = m_currentCharacter;
- EncodedCharPtr last = m_pchLast;
- bool seenDelimitedCommentEnd = false;
- // store the last token
- m_tkPrevious = m_ptoken->tk;
- m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas
- m_ichLimTokPrevious = IchLimTok();
- size_t savedMultiUnits = this->m_cMultiUnits;
- if (p >= last)
- {
- m_pchMinTok = p;
- m_cMinTokMultiUnits = this->m_cMultiUnits;
- goto LEof;
- }
- tokens token;
- m_fHadEol = FALSE;
- CharTypes chType;
- charcount_t commentStartLine;
- if (m_scanState && *p != 0)
- {
- if (m_scanState == ScanStateStringTemplateMiddleOrEnd)
- {
- AssertMsg(m_fStringTemplateDepth > 0,
- "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template");
- m_scanState = ScanStateNormal;
- pchT = p;
- token = ScanStringTemplateMiddleOrEnd(&pchT);
- p = pchT;
- goto LDone;
- }
- }
- for (;;)
- {
- LLoop:
- m_pchMinTok = p;
- m_cMinTokMultiUnits = this->m_cMultiUnits;
- ch = this->ReadFirst(p, last);
- #if DEBUG
- chType = this->charClassifier->GetCharType((OLECHAR)ch);
- #endif
- switch (ch)
- {
- LDefault:
- default:
- if (ch == kchLS ||
- ch == kchPS )
- {
- goto LNewLine;
- }
- {
- BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch);
- if (isMultiUnit)
- {
- ch = this->template ReadRest<true>((OLECHAR)ch, p, last);
- }
- if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
- {
- codepoint_t upper = this->PeekFull(p, last);
- if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
- {
- // Consume the rest of the utf8 bytes for the codepoint
- OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);
- Assert(decodedUpper == (OLECHAR) upper);
- ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);
- }
- }
- if (this->charClassifier->IsIdStart(ch))
- {
- // We treat IDContinue as an error.
- token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p);
- break;
- }
- }
- chType = this->charClassifier->GetCharType(ch);
- switch (chType)
- {
- case _C_WSP: continue;
- case _C_NWL: goto LNewLine;
- // All other types (except errors) are handled by the outer switch.
- }
- Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG);
- m_currentCharacter = p - 1;
- Error(ERRillegalChar);
- continue;
- case '\0':
- // Put back the null in case we get called again.
- p--;
- if (p < last)
- {
- // A \0 prior to the end of the text is an invalid character.
- m_currentCharacter = p;
- Error(ERRillegalChar);
- }
- LEof:
- Assert(p >= last);
- token = tkEOF;
- break;
- case 0x0009:
- case 0x000B:
- case 0x000C:
- case 0x0020:
- Assert(chType == _C_WSP);
- continue;
- case '.':
- if (!Js::NumberUtilities::IsDigit(*p))
- {
- // Not a double
- if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() &&
- this->PeekFirst(p, last) == '.' &&
- this->PeekFirst(p + 1, last) == '.')
- {
- token = tkEllipsis;
- p += 2;
- }
- else
- {
- token = tkDot;
- }
- break;
- }
- // May be a double, fall through
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- {
- double dbl;
- Assert(chType == _C_DIG || chType == _C_DOT);
- p = m_pchMinTok;
- this->RestoreMultiUnits(m_cMinTokMultiUnits);
- LikelyNumberType likelyType = LikelyNumberType::Int;
- pchT = FScanNumber(p, &dbl, likelyType, savedMultiUnits);
- if (p == pchT)
- {
- this->RestoreMultiUnits(savedMultiUnits);
- Assert(this->PeekFirst(p, last) != '.');
- Error(ERRbadNumber);
- }
- Assert(!Js::NumberUtilities::IsNan(dbl));
- if (likelyType == LikelyNumberType::BigInt)
- {
- Assert(m_scriptContext->GetConfig()->IsESBigIntEnabled());
- AssertOrFailFast(pchT - p < UINT_MAX);
- token = tkBigIntCon;
- m_ptoken->SetBigInt(this->GetHashTbl()->PidHashNameLen(p, pchT, (uint32) (pchT - p)));
- p = pchT;
- break;
- }
- p = pchT;
- int32 value;
- if ((likelyType == LikelyNumberType::Int) && Js::NumberUtilities::FDblIsInt32(dbl, &value))
- {
- m_ptoken->SetLong(value);
- token = tkIntCon;
- }
- else
- {
- token = tkFltCon;
- m_ptoken->SetDouble(dbl, likelyType == LikelyNumberType::Int);
- }
- break;
- }
- case '(': Assert(chType == _C_LPR); token = tkLParen; break;
- case ')': Assert(chType == _C_RPR); token = tkRParen; break;
- case ',': Assert(chType == _C_CMA); token = tkComma; break;
- case ';': Assert(chType == _C_SMC); token = tkSColon; break;
- case '[': Assert(chType == _C_LBR); token = tkLBrack; break;
- case ']': Assert(chType == _C_RBR); token = tkRBrack; break;
- case '~': Assert(chType == _C_TIL); token = tkTilde; break;
- case '?':
- Assert(chType == _C_QUE);
- token = tkQMark;
- if (m_scriptContext->GetConfig()->IsESNullishCoalescingOperatorEnabled() && this->PeekFirst(p, last) == '?')
- {
- p++;
- token = tkCoalesce;
- break;
- }
- break;
- case '{': Assert(chType == _C_LC); token = tkLCurly; break;
- // ES 2015 11.3 Line Terminators
- case '\r':
- case '\n':
- // kchLS:
- // kchPS:
- LNewLine:
- m_currentCharacter = p;
- ScanNewLine(ch);
- p = m_currentCharacter;
- m_fHadEol = TRUE;
- continue;
- LReserved:
- {
- // We will derive the PID from the token
- Assert(token < tkID);
- m_ptoken->SetIdentifier(NULL);
- goto LDone;
- }
- LEval:
- {
- token = tkID;
- if (!this->m_parser) goto LIdentifier;
- m_ptoken->SetIdentifier(this->m_parser->GetEvalPid());
- goto LDone;
- }
- LArguments:
- {
- token = tkID;
- if (!this->m_parser) goto LIdentifier;
- m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid());
- goto LDone;
- }
- LTarget:
- {
- token = tkID;
- if (!this->m_parser) goto LIdentifier;
- m_ptoken->SetIdentifier(this->m_parser->GetTargetPid());
- goto LDone;
- }
- #include "kwd-swtch.h"
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F': case 'G': case 'H': case 'I': case 'J':
- case 'K': case 'L': case 'M': case 'N': case 'O':
- case 'P': case 'Q': case 'R': case 'S': case 'T':
- case 'U': case 'V': case 'W': case 'X': case 'Y':
- case 'Z':
- // Lower-case letters handled in kwd-swtch.h above during reserved word recognition.
- case '$': case '_':
- LIdentifier:
- Assert(this->charClassifier->IsIdStart(ch));
- Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch));
- token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p);
- break;
- case '`':
- Assert(chType == _C_BKQ);
- pchT = p;
- token = ScanStringTemplateBegin(&pchT);
- p = pchT;
- break;
- case '}':
- Assert(chType == _C_RC);
- token = tkRCurly;
- break;
- case '\\':
- pchT = p - 1;
- token = ScanIdentifier(identifyKwds, &pchT);
- if (tkScanError == token)
- {
- m_currentCharacter = p;
- Error(ERRillegalChar);
- }
- p = pchT;
- break;
- case ':':
- token = tkColon;
- break;
- case '=':
- token = tkAsg;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkEQ;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkEqv;
- }
- break;
- case '>':
- p++;
- token = tkDArrow;
- break;
- }
- break;
- case '!':
- token = tkBang;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkNE;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkNEqv;
- }
- }
- break;
- case '+':
- token = tkAdd;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgAdd;
- break;
- case '+':
- p++;
- token = tkInc;
- break;
- }
- break;
- case '-':
- token = tkSub;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgSub;
- break;
- case '-':
- p++;
- token = tkDec;
- if (!m_fIsModuleCode)
- {
- // https://tc39.github.io/ecma262/#prod-annexB-MultiLineComment
- // If there was a new line in the multi-line comment, the text after --> is a comment.
- if ('>' == this->PeekFirst(p, last) && m_fHadEol)
- {
- goto LSkipLineComment;
- }
- }
- break;
- }
- break;
- case '*':
- token = tkStar;
- switch(this->PeekFirst(p, last))
- {
- case '=' :
- p++;
- token = tkAsgMul;
- break;
- case '*' :
- if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled())
- {
- break;
- }
- p++;
- token = tkExpo;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgExpo;
- }
- }
- break;
- case '#':
- // Hashbang syntax is a single line comment only if it is the first token in the source
- if (m_scriptContext->GetConfig()->IsESHashbangEnabled() && this->PeekFirst(p, last) == '!' && m_pchBase == m_pchMinTok)
- {
- p++;
- goto LSkipLineComment;
- }
- goto LDefault;
- case '/':
- token = tkDiv;
- switch(this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgDiv;
- break;
- case '/':
- if (p >= last)
- {
- AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?");
- // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning
- // those, we have passed "last" already. Move back and return EOF.
- p = last;
- goto LEof;
- }
- ch = *++p;
- firstChar = (OLECHAR)ch;
- LSkipLineComment:
- pchT = NULL;
- for (;;)
- {
- switch ((ch = this->ReadFirst(p, last)))
- {
- case kchLS: // 0x2028, classifies as new line
- case kchPS: // 0x2029, classifies as new line
- LEcmaCommentLineBreak:
- // kchPS and kchLS are more than one unit in UTF-8.
- if (pchT)
- {
- // kchPS and kchLS are more than one unit in UTF-8.
- p = pchT;
- }
- else
- {
- // But only a single code unit in UTF16
- p--;
- }
- this->RestoreMultiUnits(multiUnits);
- goto LCommentLineBreak;
- case kchNWL:
- case kchRET:
- p--;
- LCommentLineBreak:
- // Subtract the comment length from the total char count for the purpose
- // of deciding whether to defer AST and byte code generation.
- m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok));
- break;
- case kchNUL:
- // Because we used ReadFirst, we have advanced p. The character that we are looking at is actually is p - 1.
- // If p == last, we are looking at p - 1, it is still within the source buffer, and we need to consider it part of the comment
- // Only if p > last that we have pass the source buffer and consider it a line break
- if (p > last)
- {
- p--;
- goto LCommentLineBreak;
- }
- continue;
- default:
- if (this->IsMultiUnitChar((OLECHAR)ch))
- {
- pchT = p - 1;
- multiUnits = this->m_cMultiUnits;
- switch (ch = this->template ReadRest<true>((OLECHAR)ch, p, last))
- {
- case kchLS:
- case kchPS:
- goto LEcmaCommentLineBreak;
- }
- }
- continue;
- }
- break;
- }
- continue;
- case '*':
- ch = *++p;
- firstChar = (OLECHAR)ch;
- if ((p + 1) < last)
- {
- secondChar = (OLECHAR)(*(p + 1));
- }
- else
- {
- secondChar = '\0';
- }
- pchT = p;
- commentStartLine = m_line;
- bool containTypeDef;
- if (tkNone == (token = SkipComment(&pchT, &containTypeDef)))
- {
- // Subtract the comment length from the total char count for the purpose
- // of deciding whether to defer AST and byte code generation.
- m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok));
- p = pchT;
- seenDelimitedCommentEnd = true;
- goto LLoop;
- }
- p = pchT;
- break;
- }
- break;
- case '%':
- Assert(chType == _C_PCT);
- token = tkPct;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgMod;
- }
- break;
- case '<':
- Assert(chType == _C_LT);
- token = tkLT;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkLE;
- break;
- case '<':
- p++;
- token = tkLsh;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgLsh;
- break;
- }
- break;
- case '!':
- // ES 2015 B.1.3 - HTML comments are only allowed when parsing non-module code.
- if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-')
- {
- // This is a "<!--" comment - treat as //
- if (p >= last)
- {
- // Effective source length may have excluded HTMLCommentSuffix "<!-- ... -->". If we are scanning
- // those, we have passed "last" already. Move back and return EOF.
- p = last;
- goto LEof;
- }
- firstChar = '!';
- goto LSkipLineComment;
- }
- break;
- }
- break;
- case '>':
- Assert(chType == _C_GT);
- token = tkGT;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkGE;
- break;
- case '>':
- p++;
- token = tkRsh;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgRsh;
- break;
- case '>':
- p++;
- token = tkRs2;
- if (*p == '=')
- {
- p++;
- token = tkAsgRs2;
- }
- break;
- }
- break;
- }
- break;
- case '^':
- Assert(chType == _C_XOR);
- token = tkXor;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgXor;
- }
- break;
- case '|':
- Assert(chType == _C_BAR);
- token = tkOr;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgOr;
- break;
- case '|':
- p++;
- token = tkLogOr;
- break;
- }
- break;
- case '&':
- Assert(chType == _C_AMP);
- token = tkAnd;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgAnd;
- break;
- case '&':
- p++;
- token = tkLogAnd;
- break;
- }
- break;
- case '\'':
- case '"':
- Assert(chType == _C_QUO || chType == _C_APO);
- pchT = p;
- token = this->ScanStringConstant((OLECHAR)ch, &pchT);
- p = pchT;
- break;
- }
- break;
- }
- LDone:
- m_currentCharacter = p;
- return (m_ptoken->tk = token);
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::GetSecondaryBufferAsPid()
- {
- bool createPid = true;
- if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
- {
- createPid = false;
- }
- if (createPid)
- {
- return this->GetHashTbl()->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur);
- }
- else
- {
- return nullptr;
- }
- }
- template <typename EncodingPolicy>
- LPCOLESTR Scanner<EncodingPolicy>::StringFromLong(int32 lw)
- {
- _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10);
- return m_tempChBuf.m_prgch;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidFromLong(int32 lw)
- {
- return this->GetHashTbl()->PidHashName(StringFromLong(lw));
- }
- template <typename EncodingPolicy>
- LPCOLESTR Scanner<EncodingPolicy>::StringFromDbl(double dbl)
- {
- if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax))
- {
- Error(ERRnoMemory);
- }
- return m_tempChBuf.m_prgch;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidFromDbl(double dbl)
- {
- return this->GetHashTbl()->PidHashName(StringFromDbl(dbl));
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint)
- {
- Capture(restorePoint, 0, 0);
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr)
- {
- restorePoint->m_ichMinTok = this->IchMinTok();
- restorePoint->m_ichMinLine = this->IchMinLine();
- restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits;
- restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits;
- restorePoint->m_line = this->m_line;
- restorePoint->m_fHadEol = this->m_fHadEol;
- restorePoint->functionIdIncrement = functionIdIncrement;
- restorePoint->lengthDecr = lengthDecr;
- #ifdef DEBUG
- restorePoint->m_cMultiUnits = this->m_cMultiUnits;
- #endif
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint)
- {
- SeekAndScan<false>(restorePoint);
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SeekToForcingPid(const RestorePoint& restorePoint)
- {
- SeekAndScan<true>(restorePoint);
- }
- template <typename EncodingPolicy>
- template <bool forcePid>
- void Scanner<EncodingPolicy>::SeekAndScan(const RestorePoint& restorePoint)
- {
- this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits;
- this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits;
- this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits;
- this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits);
- if (forcePid)
- {
- this->ScanForcingPid();
- }
- else
- {
- this->Scan();
- }
- this->m_line = restorePoint.m_line;
- this->m_fHadEol = restorePoint.m_fHadEol;
- this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr);
- Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits);
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId)
- {
- SeekTo(restorePoint);
- *nextFunctionId += restorePoint.functionIdIncrement;
- }
- // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred.
- template<typename EncodingPolicy>
- HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine)
- {
- if( !pbstrLine )
- {
- return E_POINTER;
- }
- // If we overflow the string, we have a serious problem...
- if (ichMinLine < 0 || static_cast<size_t>(ichMinLine) > AdjustedLength() )
- {
- return E_UNEXPECTED;
- }
- typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine);
- // Determine the length by scanning for the next newline
- size_t cb = 0;
- charcount_t cch = LineLength(pStart, m_pchLast, &cb);
- Assert(cch <= LONG_MAX);
- typename EncodingPolicy::EncodedCharPtr pEnd = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine + cb : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch);
- *pbstrLine = SysAllocStringLen(NULL, cch);
- if (!*pbstrLine)
- {
- return E_OUTOFMEMORY;
- }
- this->ConvertToUnicode(*pbstrLine, cch, pStart, pEnd);
- return S_OK;
- }
- template class Scanner<NotNullTerminatedUTF8EncodingPolicy>;
|