| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "ParserPch.h"
- /*****************************************************************************
- *
- * The following table speeds various tests of characters, such as whether
- * a given character can be part of an identifier, and so on.
- */
- int CountNewlines(LPCOLESTR psz, int cch)
- {
- int cln = 0;
- while (0 != *psz && 0 != cch--)
- {
- switch (*psz++)
- {
- case _u('\xD'):
- if (*psz == _u('\xA'))
- {
- ++psz;
- if (0 == cch--)
- break;
- }
- // fall-through
- case _u('\xA'):
- cln++;
- break;
- }
- }
- return cln;
- }
- BOOL Token::IsKeyword() const
- {
- // keywords (but not future reserved words)
- return (tk <= tkYIELD);
- }
- tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser)
- {
- Assert(parser);
- if(pattern)
- parser->RegisterRegexPattern(pattern);
- this->u.pattern = pattern;
- return tk = tkRegExp;
- }
- IdentPtr Token::CreateIdentifier(HashTbl * hashTbl)
- {
- Assert(this->u.pid == nullptr);
- if (this->u.pchMin)
- {
- Assert(IsIdentifier());
- IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.pchMin + this->u.length, this->u.length);
- this->u.pid = pid;
- return pid;
- }
- Assert(IsReservedWord());
- IdentPtr pid = hashTbl->PidFromTk(tk);
- this->u.pid = pid;
- return pid;
- }
- template <typename EncodingPolicy>
- Scanner<EncodingPolicy>::Scanner(Parser* parser, HashTbl *phtbl, Token *ptoken, Js::ScriptContext* scriptContext)
- {
- AssertMem(phtbl);
- AssertMem(ptoken);
- m_parser = parser;
- m_phtbl = phtbl;
- m_ptoken = ptoken;
- m_cMinLineMultiUnits = 0;
- m_fHadEol = FALSE;
- m_doubleQuoteOnLastTkStrCon = FALSE;
- m_OctOrLeadingZeroOnLastTKNumber = false;
- m_fStringTemplateDepth = 0;
- m_scanState = ScanStateNormal;
- m_scriptContext = scriptContext;
- m_line = 0;
- m_startLine = 0;
- m_pchStartLine = NULL;
- m_ichMinError = 0;
- m_ichLimError = 0;
- m_tempChBuf.m_pscanner = this;
- m_tempChBufSecondary.m_pscanner = this;
- m_iecpLimTokPrevious = (size_t)-1;
- this->charClassifier = scriptContext->GetCharClassifier();
- this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled();
- m_fYieldIsKeywordRegion = false;
- m_fAwaitIsKeywordRegion = false;
- }
- template <typename EncodingPolicy>
- Scanner<EncodingPolicy>::~Scanner(void)
- {
- }
- /*****************************************************************************
- *
- * Initializes the scanner to prepare to scan the given source text.
- */
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, ULONG grfscr, ULONG lineNumber)
- {
- // Save the start of the script and add the offset to get the point where we should start scanning.
- m_pchBase = pszSrc;
- m_pchLast = m_pchBase + offset + length;
- m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset;
- this->RestoreMultiUnits(offset - charOffset);
- // Absorb any byte order mark at the start
- if(offset == 0)
- {
- switch( this->PeekFull(m_currentCharacter, m_pchLast) )
- {
- case 0xFFEE: // "Opposite" endian BOM
- // We do not support big-endian encodings
- // fall-through
- case 0xFEFF: // "Correct" BOM
- this->template ReadFull<true>(m_currentCharacter, m_pchLast);
- break;
- }
- }
- m_line = lineNumber;
- m_startLine = lineNumber;
- m_pchStartLine = m_currentCharacter;
- m_ptoken->tk = tkNone;
- m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0;
- m_fHadEol = FALSE;
- m_DeferredParseFlags = ScanFlagNone;
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::PrepareForBackgroundParse(Js::ScriptContext *scriptContext)
- {
- scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
- scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
- }
- //-----------------------------------------------------------------------------
- // Number of code points from 'first' up to, but not including the next
- // newline character, embedded NUL, or 'last', depending on which comes first.
- //
- // This is used to determine a length of BSTR, which can't contain a NUL character.
- //-----------------------------------------------------------------------------
- template <typename EncodingPolicy>
- charcount_t Scanner<EncodingPolicy>::LineLength(EncodedCharPtr first, EncodedCharPtr last)
- {
- charcount_t result = 0;
- EncodedCharPtr p = first;
- for (;;)
- {
- switch( this->template ReadFull<false>(p, last) )
- {
- case kchNWL: // _C_NWL
- case kchRET:
- case kchLS:
- case kchPS:
- case kchNUL: // _C_NUL
- return result;
- }
- result++;
- }
- }
- template <typename EncodingPolicy>
- charcount_t Scanner<EncodingPolicy>::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd)
- {
- EncodedCharPtr p = start;
- charcount_t ich = ichStart;
- int32 current = line;
- charcount_t lastStart = ichStart;
- while (ich < ichEnd)
- {
- ich++;
- switch (this->template ReadFull<false>(p, last))
- {
- case kchRET:
- if (this->PeekFull(p, last) == kchNWL)
- {
- ich++;
- this->template ReadFull<false>(p, last);
- }
- // fall-through
- case kchNWL:
- case kchLS:
- case kchPS:
- current++;
- lastStart = ich;
- break;
- case kchNUL:
- goto done;
- }
- }
- done:
- line = current;
- return lastStart;
- }
- template <typename EncodingPolicy>
- bool Scanner<EncodingPolicy>::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar)
- {
- Assert(outChar != nullptr);
- Assert(startingLocation <= endOfSource);
- EncodedCharPtr currentLocation = startingLocation;
- codepoint_t charToOutput = 0x0;
- // '\' is Assumed as there is only one caller
- // Read 'u' characters
- if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u')
- {
- return false;
- }
- bool expectCurly = false;
- if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode)
- {
- expectCurly = true;
- // Move past the character
- this->ReadFirst(currentLocation, endOfSource);
- }
- uint i = 0;
- OLECHAR ch = 0;
- int hexValue = 0;
- uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u);
- for(; i < maxHexDigits && currentLocation < endOfSource; i++)
- {
- if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue))
- {
- break;
- }
- charToOutput = charToOutput * 0x10 + hexValue;
- if (charToOutput > 0x10FFFF)
- {
- return false;
- }
- }
- //At least 4 characters have to be read
- if (i == 0 || (i != 4 && !expectCurly))
- {
- return false;
- }
- Assert(expectCurly ? es6UnicodeMode : true);
- if (expectCurly && ch != '}')
- {
- return false;
- }
- *outChar = charToOutput;
- startingLocation = currentLocation;
- return true;
- }
- template <typename EncodingPolicy>
- template <bool bScan>
- bool Scanner<EncodingPolicy>::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar)
- {
- Assert(outChar != nullptr);
- Assert(outContainsMultiUnitChar != nullptr);
- Assert(es6UnicodeMode);
- Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower));
- EncodedCharPtr currentLocation = startingLocation;
- *outChar = lower;
- if (currentLocation < endOfSource)
- {
- size_t restorePoint = this->m_cMultiUnits;
- codepoint_t upper = this->template ReadFull<bScan>(currentLocation, endOfSource);
- if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
- {
- *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper);
- if (this->IsMultiUnitChar(static_cast<OLECHAR>(upper)))
- {
- *outContainsMultiUnitChar = true;
- }
- startingLocation = currentLocation;
- }
- else
- {
- this->RestoreMultiUnits(restorePoint);
- }
- }
- return true;
- }
- template <typename EncodingPolicy>
- template <bool bScan>
- inline bool Scanner<EncodingPolicy>::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar)
- {
- Assert(outChar != nullptr);
- Assert(outContainsMultiUnitChar != nullptr);
- if (startingLocation >= endOfSource)
- {
- return false;
- }
- codepoint_t ch = this->template ReadFull<bScan>(startingLocation, endOfSource);
- if (FBigChar(ch))
- {
- if (this->IsMultiUnitChar(static_cast<OLECHAR>(ch)))
- {
- *outContainsMultiUnitChar = true;
- }
- if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
- {
- return TryReadCodePointRest<bScan>(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar);
- }
- }
- else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch))
- {
- *hasEscape = true;
- }
- *outChar = ch;
- return true;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp)
- {
- EncodedCharPtr p = *pp;
- EncodedCharPtr pchMin = p;
- // JS6 allows unicode characters in the form of \uxxxx escape sequences
- // to be part of the identifier.
- bool fHasEscape = false;
- bool fHasMultiChar = false;
- codepoint_t codePoint = INVALID_CODEPOINT;
- size_t multiUnitsBeforeLast = this->m_cMultiUnits;
- // Check if we started the id
- if (!TryReadCodePoint<true>(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar))
- {
- // If no chars. could be scanned as part of the identifier, return error.
- return tkScanError;
- }
- Assert(codePoint < 0x110000u);
- if (!charClassifier->IsIdStart(codePoint))
- {
- // Put back the last character
- this->RestoreMultiUnits(multiUnitsBeforeLast);
- // If no chars. could be scanned as part of the identifier, return error.
- return tkScanError;
- }
- return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp);
- }
- template <typename EncodingPolicy>
- BOOL Scanner<EncodingPolicy>::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last)
- {
- if (EncodingPolicy::MultiUnitEncoding)
- {
- while (p < last)
- {
- EncodedChar currentChar = *p;
- if (this->IsMultiUnitChar(currentChar))
- {
- // multi unit character, we may not have reach the end yet
- return FALSE;
- }
- Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast<false>(currentChar));
- if (!charClassifier->IsIdContinueFast<false>(currentChar))
- {
- // only reach the end of the identifier if it is not the start of an escape sequence
- return currentChar != '\\';
- }
- p++;
- }
- // We have reach the end of the identifier.
- return TRUE;
- }
- // Not fast path for non multi unit encoding
- return false;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar,
- EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp)
- {
- EncodedCharPtr last = m_pchLast;
- while (true)
- {
- // Fast path for utf8, non-multi unit char and not escape
- if (FastIdentifierContinue(p, last))
- {
- break;
- }
- // Slow path that has to deal with multi unit encoding
- codepoint_t codePoint = INVALID_CODEPOINT;
- EncodedCharPtr pchBeforeLast = p;
- size_t multiUnitsBeforeLast = this->m_cMultiUnits;
- if (TryReadCodePoint<true>(p, last, &codePoint, &fHasEscape, &fHasMultiChar))
- {
- Assert(codePoint < 0x110000u);
- if (charClassifier->IsIdContinue(codePoint))
- {
- continue;
- }
- }
- // Put back the last character
- p = pchBeforeLast;
- this->RestoreMultiUnits(multiUnitsBeforeLast);
- break;
- }
- Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX);
- *pp = p;
- if (!identifyKwds)
- {
- return tkID;
- }
- // During syntax coloring, scanner doesn't need to convert the escape sequence to get actual characters, it just needs the classification information
- // So call up hashtables custom method to check if the string scanned is identifier or keyword.
- // Do the same for deferred parsing, but use a custom method that only tokenizes JS keywords.
- if ((m_DeferredParseFlags & ScanFlagSuppressIdPid) != 0)
- {
- m_ptoken->SetIdentifier(NULL);
- if (!fHasEscape)
- {
- // If there are no escape, that the main scan loop would have found the keyword already
- // So we can just assume it is an ID
- DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
- DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
- Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()));
- return tkID;
- }
- int32 cch = UnescapeToTempBuf(pchMin, p);
- tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode());
- return (!this->YieldIsKeyword() && tk == tkYIELD) || (!this->AwaitIsKeyword() && tk == tkAWAIT) ? tkID : tk;
- }
- // UTF16 Scanner are only for syntax coloring, so it shouldn't come here.
- if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape)
- {
- Assert(sizeof(EncodedChar) == 1);
- // If there are no escape, that the main scan loop would have found the keyword already
- // So we can just assume it is an ID
- DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
- DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
- Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()));
- m_ptoken->SetIdentifier(reinterpret_cast<const char *>(pchMin), (int32)(p - pchMin));
- return tkID;
- }
- IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar);
- m_ptoken->SetIdentifier(pid);
- if (!fHasEscape)
- {
- // If it doesn't have escape, then Scan() should have taken care of keywords (except
- // yield if this->YieldIsKeyword() is false, in which case yield is treated as an identifier, and except
- // await if this->AwaitIsKeyword() is false, in which case await is treated as an identifier).
- // We don't have to check if the name is reserved word and return it as an Identifier
- Assert(pid->Tk(IsStrictMode()) == tkID
- || (pid->Tk(IsStrictMode()) == tkYIELD && !this->YieldIsKeyword())
- || (pid->Tk(IsStrictMode()) == tkAWAIT && !this->AwaitIsKeyword()));
- return tkID;
- }
- tokens tk = pid->Tk(IsStrictMode());
- return tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()) ? tkID : tkNone;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidAt(size_t iecpMin, size_t iecpLim)
- {
- Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin);
- return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim);
- }
- template <typename EncodingPolicy>
- uint32 Scanner<EncodingPolicy>::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last)
- {
- m_tempChBuf.Init();
- while( p < last )
- {
- codepoint_t codePoint;
- bool hasEscape, isMultiChar;
- bool gotCodePoint = TryReadCodePoint<false>(p, last, &codePoint, &hasEscape, &isMultiChar);
- Assert(gotCodePoint);
- Assert(codePoint < 0x110000);
- if (codePoint < 0x10000)
- {
- m_tempChBuf.AppendCh((OLECHAR)codePoint);
- }
- else
- {
- char16 lower, upper;
- Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper);
- m_tempChBuf.AppendCh(lower);
- m_tempChBuf.AppendCh(upper);
- }
- }
- return m_tempChBuf.m_ichCur;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last)
- {
- int32 cch = UnescapeToTempBuf(p, last);
- return m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, cch);
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar)
- {
- // If there is an escape sequence in the JS6 identifier or it is a UTF8
- // source then we have to convert it to the equivalent char so we use a
- // buffer for translation.
- if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape)
- {
- return PidOfIdentiferAt(p, last);
- }
- else if (EncodingPolicy::MultiUnitEncoding)
- {
- Assert(sizeof(EncodedChar) == 1);
- return m_phtbl->PidHashNameLen(reinterpret_cast<const char *>(p), reinterpret_cast<const char *>(last), (int32)(last - p));
- }
- else
- {
- Assert(sizeof(EncodedChar) == 2);
- return m_phtbl->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p));
- }
- }
- template <typename EncodingPolicy>
- typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)
- {
- EncodedCharPtr last = m_pchLast;
- EncodedCharPtr pchT;
- likelyInt = true;
- // Reset
- m_OctOrLeadingZeroOnLastTKNumber = false;
- if ('0' == this->PeekFirst(p, last))
- {
- switch(this->PeekFirst(p + 1, last))
- {
- case '.':
- case 'e':
- case 'E':
- likelyInt = false;
- // Floating point
- goto LFloat;
- case 'x':
- case 'X':
- // Hex
- *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT);
- if (pchT == p + 2)
- {
- // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
- *pdbl = 0;
- return p + 1;
- }
- else
- return pchT;
- case 'o':
- case 'O':
- // Octal
- *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT);
- if (pchT == p + 2)
- {
- // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
- *pdbl = 0;
- return p + 1;
- }
- return pchT;
- case 'b':
- case 'B':
- // Binary
- *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT);
- if (pchT == p + 2)
- {
- // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
- *pdbl = 0;
- return p + 1;
- }
- return pchT;
- default:
- // Octal
- *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT);
- Assert(pchT > p);
- #if !SOURCERELEASE
- // If an octal literal is malformed then it is in fact a decimal literal.
- #endif // !SOURCERELEASE
- if(*pdbl != 0 || pchT > p + 1)
- m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
- switch (*pchT)
- {
- case '8':
- case '9':
- // case 'e':
- // case 'E':
- // case '.':
- m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
- goto LFloat;
- }
- return pchT;
- }
- }
- else
- {
- LFloat:
- *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyInt);
- Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl));
- return pchT;
- }
- }
- template <typename EncodingPolicy>
- BOOL Scanner<EncodingPolicy>::oFScanNumber(double *pdbl, bool& likelyInt)
- {
- EncodedCharPtr pchT;
- m_OctOrLeadingZeroOnLastTKNumber = false;
- likelyInt = true;
- if ('0' == *m_currentCharacter)
- {
- switch (m_currentCharacter[1])
- {
- case '.':
- case 'e':
- case 'E':
- likelyInt = false;
- // Floating point.
- goto LFloat;
- case 'x':
- case 'X':
- // Hex.
- *pdbl = Js::NumberUtilities::DblFromHex<EncodedChar>(m_currentCharacter + 2, &pchT);
- if (pchT == m_currentCharacter + 2)
- {
- // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
- *pdbl = 0;
- m_currentCharacter++;
- }
- else
- m_currentCharacter = pchT;
- break;
- case 'o':
- case 'O':
- *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter + 2, &pchT);
- if (pchT == m_currentCharacter + 2)
- {
- // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
- *pdbl = 0;
- m_currentCharacter++;
- }
- else
- m_currentCharacter = pchT;
- break;
- case 'b':
- case 'B':
- *pdbl = Js::NumberUtilities::DblFromBinary(m_currentCharacter + 2, &pchT);
- if (pchT == m_currentCharacter + 2)
- {
- // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
- *pdbl = 0;
- m_currentCharacter++;
- }
- else
- m_currentCharacter = pchT;
- break;
- default:
- // Octal.
- *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter, &pchT);
- Assert(pchT > m_currentCharacter);
- #if !SOURCERELEASE
- // If an octal literal is malformed then it is in fact a decimal literal.
- #endif // !SOURCERELEASE
- if(*pdbl != 0 || pchT > m_currentCharacter + 1)
- m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
- switch (*pchT)
- {
- case '8':
- case '9':
- // case 'e':
- // case 'E':
- // case '.':
- m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
- goto LFloat;
- }
- m_currentCharacter = pchT;
- break;
- }
- }
- else
- {
- LFloat:
- // Let StrToDbl do all the work.
- *pdbl = Js::NumberUtilities::StrToDbl(m_currentCharacter, &pchT, likelyInt);
- if (pchT == m_currentCharacter)
- return FALSE;
- m_currentCharacter = pchT;
- Assert(!Js::NumberUtilities::IsNan(*pdbl));
- }
- return TRUE;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::TryRescanRegExp()
- {
- EncodedCharPtr current = m_currentCharacter;
- tokens result = RescanRegExp();
- if (result == tkScanError)
- m_currentCharacter = current;
- return result;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::RescanRegExp()
- {
- #if DEBUG
- switch (m_ptoken->tk)
- {
- case tkDiv:
- Assert(m_currentCharacter == m_pchMinTok + 1);
- break;
- case tkAsgDiv:
- Assert(m_currentCharacter == m_pchMinTok + 2);
- break;
- default:
- AssertMsg(FALSE, "Who is calling RescanRegExp?");
- break;
- }
- #endif //DEBUG
- m_currentCharacter = m_pchMinTok;
- if (*m_currentCharacter != '/')
- Error(ERRnoSlash);
- m_currentCharacter++;
- tokens tk = tkNone;
- {
- ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
- tk = ScanRegExpConstant(&alloc);
- }
- return tk;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::RescanRegExpNoAST()
- {
- #if DEBUG
- switch (m_ptoken->tk)
- {
- case tkDiv:
- Assert(m_currentCharacter == m_pchMinTok + 1);
- break;
- case tkAsgDiv:
- Assert(m_currentCharacter == m_pchMinTok + 2);
- break;
- default:
- AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
- break;
- }
- #endif //DEBUG
- m_currentCharacter = m_pchMinTok;
- if (*m_currentCharacter != '/')
- Error(ERRnoSlash);
- m_currentCharacter++;
- tokens tk = tkNone;
- {
- ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
- {
- tk = ScanRegExpConstantNoAST(&alloc);
- }
- }
- return tk;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::RescanRegExpTokenizer()
- {
- #if DEBUG
- switch (m_ptoken->tk)
- {
- case tkDiv:
- Assert(m_currentCharacter == m_pchMinTok + 1);
- break;
- case tkAsgDiv:
- Assert(m_currentCharacter == m_pchMinTok + 2);
- break;
- default:
- AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
- break;
- }
- #endif //DEBUG
- m_currentCharacter = m_pchMinTok;
- if (*m_currentCharacter != '/')
- Error(ERRnoSlash);
- m_currentCharacter++;
- tokens tk = tkNone;
- ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread();
- threadContext->EnsureRecycler();
- Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp"));
- TryFinally(
- [&]() /* try block */
- {
- tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator());
- },
- [&](bool /* hasException */) /* finally block */
- {
- threadContext->ReleaseTemporaryAllocator(alloc);
- });
- return tk;
- }
- template <typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanRegExpConstant(ArenaAllocator* alloc)
- {
- PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
- // SEE ALSO: RegexHelper::PrimCompileDynamic()
- #ifdef PROFILE_EXEC
- m_scriptContext->ProfileBegin(Js::RegexCompilePhase);
- #endif
- ArenaAllocator* ctAllocator = alloc;
- UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
- UnifiedRegex::StandardChars<char16>* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- UnifiedRegex::DebugWriter *w = 0;
- if (REGEX_CONFIG_FLAG(RegexDebug))
- w = m_scriptContext->GetRegexDebugWriter();
- if (REGEX_CONFIG_FLAG(RegexProfile))
- m_scriptContext->GetRegexStatsDatabase()->BeginProfile();
- #endif
- UnifiedRegex::Node* root = 0;
- charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
- UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
- UnifiedRegex::Parser<EncodingPolicy, true> parser
- ( m_scriptContext
- , ctAllocator
- , standardEncodedChars
- , standardChars
- , this->IsFromExternalSource()
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , w
- #endif
- );
- try
- {
- root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags);
- }
- catch (UnifiedRegex::ParseError e)
- {
- #ifdef PROFILE_EXEC
- m_scriptContext->ProfileEnd(Js::RegexCompilePhase);
- #endif
- m_currentCharacter += e.encodedPos;
- Error(e.error);
- }
- UnifiedRegex::RegexPattern* pattern;
- if (m_parser->IsBackgroundParser())
- {
- // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern
- // and hook it to this parse node.
- pattern = parser.template CompileProgram<false>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
- }
- else
- {
- pattern = parser.template CompileProgram<true>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
- }
- this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
- return m_ptoken->SetRegex(pattern, m_parser);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanRegExpConstantNoAST(ArenaAllocator* alloc)
- {
- PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
- ThreadContext *threadContext = m_scriptContext->GetThreadContext();
- UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0);
- UnifiedRegex::StandardChars<char16>* standardChars = threadContext->GetStandardChars((char16*)0);
- charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
- UnifiedRegex::Parser<EncodingPolicy, true> parser
- ( m_scriptContext
- , alloc
- , standardEncodedChars
- , standardChars
- , this->IsFromExternalSource()
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , 0
- #endif
- );
- try
- {
- parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars);
- }
- catch (UnifiedRegex::ParseError e)
- {
- m_currentCharacter += e.encodedPos;
- Error(e.error);
- // never reached
- }
- UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram<false>(nullptr, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, UnifiedRegex::NoRegexFlags);
- Assert(pattern == nullptr); // BuildAST == false, CompileProgram should return nullptr
- this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
- return (m_ptoken->tk = tkRegExp);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanStringTemplateBegin(EncodedCharPtr *pp)
- {
- // String template must begin with a string constant followed by '`' or '${'
- ScanStringConstant<true, true>('`', pp);
- OLECHAR ch;
- EncodedCharPtr last = m_pchLast;
- ch = this->ReadFirst(*pp, last);
- if (ch == '`')
- {
- // Simple string template - no substitutions
- return tkStrTmplBasic;
- }
- else if (ch == '$')
- {
- ch = this->ReadFirst(*pp, last);
- if (ch == '{')
- {
- // Next token after expr should be tkStrTmplMid or tkStrTmplEnd.
- // In string template scanning mode, we expect the next char to be '}'
- // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid
- m_fStringTemplateDepth++;
- // Regular string template begin - next is first substitution
- return tkStrTmplBegin;
- }
- }
- // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
- (*pp)--;
- return ScanError(m_currentCharacter, tkStrTmplBegin);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp)
- {
- // String template middle and end tokens must begin with a string constant
- ScanStringConstant<true, true>('`', pp);
- OLECHAR ch;
- EncodedCharPtr last = m_pchLast;
- ch = this->ReadFirst(*pp, last);
- if (ch == '`')
- {
- // No longer in string template scanning mode
- m_fStringTemplateDepth--;
- // This is the last part of the template ...`
- return tkStrTmplEnd;
- }
- else if (ch == '$')
- {
- ch = this->ReadFirst(*pp, last);
- if (ch == '{')
- {
- // This is just another middle part of the template }...${
- return tkStrTmplMid;
- }
- }
- // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
- (*pp)--;
- return ScanError(m_currentCharacter, tkStrTmplEnd);
- }
- /*****************************************************************************
- *
- * Parses a string constant. Note that the string value is stored in
- * a volatile buffer (or allocated on the heap if too long), and thus
- * the string should be saved off before the next token is scanned.
- */
- template<typename EncodingPolicy>
- template<bool stringTemplateMode, bool createRawString>
- tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
- {
- static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value");
- OLECHAR ch, c, rawch;
- int wT;
- EncodedCharPtr p = *pp;
- EncodedCharPtr last = m_pchLast;
- // Reset
- m_OctOrLeadingZeroOnLastTKNumber = false;
- m_EscapeOnLastTkStrCon = FALSE;
- m_tempChBuf.Init();
- // Use template parameter to gate raw string creation.
- // If createRawString is false, all these operations should be no-ops
- if (createRawString)
- {
- m_tempChBufSecondary.Init();
- }
- for (;;)
- {
- switch ((rawch = ch = this->ReadFirst(p, last)))
- {
- case kchRET:
- if (stringTemplateMode)
- {
- if (this->PeekFirst(p, last) == kchNWL)
- {
- // Eat the <LF> char, ignore return
- this->ReadFirst(p, last);
- }
- // Both <CR> and <CR><LF> are normalized to <LF> in template cooked and raw values
- ch = rawch = kchNWL;
- }
- LEcmaLineBreak:
- // Fall through
- case kchNWL:
- if (stringTemplateMode)
- {
- // Notify the scanner to update current line, number of lines etc
- NotifyScannedNewLine();
- break;
- }
- m_currentCharacter = p - 1;
- Error(ERRnoStrEnd);
- case '"':
- case '\'':
- if (ch == delim)
- goto LBreak;
- break;
- case '`':
- // In string template scan mode, don't consume the '`' - we need to differentiate
- // between a closed string template and the expression open sequence - ${
- if (stringTemplateMode)
- {
- p--;
- goto LBreak;
- }
- // If we aren't scanning for a string template, do the default thing
- goto LMainDefault;
- case '$':
- // If we are parsing a string literal part of a string template, ${ indicates we need to switch
- // to parsing an expression.
- if (stringTemplateMode && this->PeekFirst(p, last) == '{')
- {
- // Rewind to the $ and return
- p--;
- goto LBreak;
- }
- // If we aren't scanning for a string template, do the default thing
- goto LMainDefault;
- case kchNUL:
- if (p > last)
- {
- m_currentCharacter = p - 1;
- Error(ERRnoStrEnd);
- }
- break;
- default:
- LMainDefault:
- if (this->IsMultiUnitChar(ch))
- {
- if ((ch == kchLS || ch == kchPS))
- {
- goto LEcmaLineBreak;
- }
- rawch = ch = this->template ReadRest<true>(ch, p, last);
- switch (ch)
- {
- case kchLS: // 0x2028, classifies as new line
- case kchPS: // 0x2029, classifies as new line
- goto LEcmaLineBreak;
- }
- }
- break;
- case kchBSL:
- // In raw mode '\\' is not an escape character, just add the char into the raw buffer.
- m_tempChBufSecondary.template AppendCh<createRawString>(ch);
- m_EscapeOnLastTkStrCon=TRUE;
- // In raw mode, we append the raw char itself and not the escaped value so save the char.
- rawch = ch = this->ReadFirst(p, last);
- codepoint_t codePoint = 0;
- uint errorType = (uint)ERRbadHexDigit;
- switch (ch)
- {
- case 'b':
- ch = 0x08;
- break;
- case 't':
- ch = 0x09;
- break;
- case 'v':
- ch = 0x0B; //Only in ES5 mode
- break; //same as default
- case 'n':
- ch = 0x0A;
- break;
- case 'f':
- ch = 0x0C;
- break;
- case 'r':
- ch = 0x0D;
- break;
- case 'x':
- // Insert the 'x' here before jumping to parse the hex digits.
- m_tempChBufSecondary.template AppendCh<createRawString>(ch);
- // 2 hex digits
- ch = 0;
- goto LTwoHex;
- case 'u':
- // Raw string just inserts a 'u' here.
- m_tempChBufSecondary.template AppendCh<createRawString>(ch);
- ch = 0;
- if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto LFourHex;
- else if (c != '{' || !this->es6UnicodeMode)
- goto ReturnScanError;
- Assert(c == '{');
- // c should definitely be a '{' which should be appended to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- //At least one digit is expected
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- {
- goto ReturnScanError;
- }
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint = static_cast<codepoint_t>(wT);
- while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- {
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint <<= 4;
- codePoint += static_cast<codepoint_t>(wT);
- if (codePoint > 0x10FFFF)
- {
- errorType = (uint)ERRInvalidCodePoint;
- goto ReturnScanError;
- }
- }
- if (c != '}')
- {
- errorType = (uint)ERRMissingCurlyBrace;
- goto ReturnScanError;
- }
- Assert(codePoint <= 0x10FFFF);
- if (codePoint >= 0x10000)
- {
- OLECHAR lower = 0;
- Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch);
- m_tempChBuf.AppendCh(lower);
- }
- else
- {
- ch = (char16)codePoint;
- }
- // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
- if (createRawString)
- rawch = c;
- break;
- LFourHex:
- codePoint = 0x0;
- // Append first hex digit character to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint += static_cast<codepoint_t>(wT * 0x1000);
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto ReturnScanError;
- // Append fourth (or second) hex digit character to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint += static_cast<codepoint_t>(wT * 0x0100);
- LTwoHex:
- // This code path doesn't expect curly.
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto ReturnScanError;
- // Append first hex digit character to the raw string.
- m_tempChBufSecondary.template AppendCh<createRawString>(c);
- codePoint += static_cast<codepoint_t>(wT * 0x0010);
- if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
- goto ReturnScanError;
- codePoint += static_cast<codepoint_t>(wT);
- // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
- if (createRawString)
- rawch = c;
- if (codePoint < 0x10000)
- {
- ch = static_cast<OLECHAR>(codePoint);
- }
- else
- {
- goto ReturnScanError;
- }
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- // 1 to 3 octal digits
- ch -= '0';
- // Octal escape sequences are not allowed inside string template literals
- if (stringTemplateMode)
- {
- c = this->PeekFirst(p, last);
- if (ch != 0 || (c >= '0' && c <= '7'))
- {
- errorType = (uint)ERRES5NoOctal;
- goto ReturnScanError;
- }
- break;
- }
- wT = (c = this->ReadFirst(p, last)) - '0';
- if ((char16)wT > 7)
- {
- if (ch != 0 || ((char16)wT <= 9))
- {
- m_OctOrLeadingZeroOnLastTKNumber = true;
- }
- p--;
- break;
- }
- m_OctOrLeadingZeroOnLastTKNumber = true;
- ch = static_cast< OLECHAR >(ch * 8 + wT);
- goto LOneOctal;
- case '4':
- case '5':
- case '6':
- case '7':
- // 1 to 2 octal digits
- // Octal escape sequences are not allowed inside string template literals
- if (stringTemplateMode)
- {
- errorType = (uint)ERRES5NoOctal;
- goto ReturnScanError;
- }
- ch -= '0';
- m_OctOrLeadingZeroOnLastTKNumber = true;
- LOneOctal:
- wT = (c = this->ReadFirst(p, last)) - '0';
- if ((char16)wT > 7)
- {
- p--;
- break;
- }
- ch = static_cast< OLECHAR >(ch * 8 + wT);
- break;
- case kchRET: // 0xD
- if (stringTemplateMode)
- {
- // If this is \<CR><LF> we can eat the <LF> right now
- if (this->PeekFirst(p, last) == kchNWL)
- {
- // Eat the <LF> char, ignore return
- this->ReadFirst(p, last);
- }
- // Both \<CR> and \<CR><LF> are normalized to \<LF> in template raw string
- rawch = kchNWL;
- }
- case kchLS: // 0x2028, classifies as new line
- case kchPS: // 0x2029, classifies as new line
- case kchNWL: // 0xA
- LEcmaEscapeLineBreak:
- if (stringTemplateMode)
- {
- // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings
- m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
- // Template literal strings ignore all escaped line continuation tokens
- NotifyScannedNewLine();
- continue;
- }
- m_currentCharacter = p;
- ScanNewLine(ch);
- p = m_currentCharacter;
- continue;
- case 0:
- if (p >= last)
- {
- errorType = (uint)ERRnoStrEnd;
- ReturnScanError:
- m_currentCharacter = p - 1;
- Error(errorType);
- }
- else if (stringTemplateMode)
- {
- // Escaped null character is translated into 0x0030 for raw template literals
- rawch = 0x0030;
- }
- break;
- default:
- if (this->IsMultiUnitChar(ch))
- {
- rawch = ch = this->template ReadRest<true>(ch, p, last);
- switch (ch)
- {
- case kchLS:
- case kchPS:
- goto LEcmaEscapeLineBreak;
- }
- }
- break;
- }
- break;
- }
- m_tempChBuf.AppendCh(ch);
- m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
- }
- LBreak:
- bool createPid = true;
- if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
- {
- createPid = false;
- if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR))))
- {
- createPid = true;
- }
- }
- if (createPid)
- {
- m_ptoken->SetIdentifier(m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur));
- }
- else
- {
- m_ptoken->SetIdentifier(NULL);
- }
- m_scanState = ScanStateNormal;
- m_doubleQuoteOnLastTkStrCon = '"' == delim;
- *pp = p;
- return tkStrCon;
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
- {
- return ScanStringConstant<false, false>(delim, pp);
- }
- /*****************************************************************************
- *
- * Consume a C-style comment.
- */
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef)
- {
- Assert(containTypeDef != nullptr);
- EncodedCharPtr p = *pp;
- *containTypeDef = false;
- EncodedCharPtr last = m_pchLast;
- OLECHAR ch;
- for (;;)
- {
- switch((ch = this->ReadFirst(p, last)))
- {
- case '*':
- if (*p == '/')
- {
- *pp = p + 1;
- return tkNone;
- }
- break;
- // ES 2015 11.3 Line Terminators
- case kchLS: // 0x2028, classifies as new line
- case kchPS: // 0x2029, classifies as new line
- LEcmaLineBreak:
- goto LLineBreak;
- case kchRET:
- case kchNWL:
- LLineBreak:
- m_fHadEol = TRUE;
- m_currentCharacter = p;
- ScanNewLine(ch);
- p = m_currentCharacter;
- break;
- case kchNUL:
- if (p >= last)
- {
- m_currentCharacter = p - 1;
- *pp = p - 1;
- Error(ERRnoCmtEnd);
- }
- break;
- default:
- if (this->IsMultiUnitChar(ch))
- {
- ch = this->template ReadRest<true>(ch, p, last);
- switch (ch)
- {
- case kchLS:
- case kchPS:
- goto LEcmaLineBreak;
- }
- }
- break;
- }
- }
- }
- /*****************************************************************************
- *
- * We've encountered a newline - update various counters and things.
- */
- template<typename EncodingPolicy>
- void Scanner<EncodingPolicy>::ScanNewLine(uint ch)
- {
- if (ch == '\r' && PeekNextChar() == '\n')
- {
- ReadNextChar();
- }
- NotifyScannedNewLine();
- }
- /*****************************************************************************
- *
- * We've encountered a newline - update various counters and things.
- */
- template<typename EncodingPolicy>
- void Scanner<EncodingPolicy>::NotifyScannedNewLine()
- {
- // update in scanner: previous line, current line, number of lines.
- m_line++;
- m_pchPrevLine = m_pchMinLine;
- m_pchMinLine = m_currentCharacter;
- m_cMinLineMultiUnits = this->m_cMultiUnits;
- }
- /*****************************************************************************
- *
- * Delivers a token stream.
- */
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanForcingPid()
- {
- if (m_DeferredParseFlags != ScanFlagNone)
- {
- BYTE deferredParseFlagsSave = m_DeferredParseFlags;
- m_DeferredParseFlags = ScanFlagNone;
- tokens result = tkEOF;
- TryFinally(
- [&]() /* try block */
- {
- result = this->Scan();
- },
- [&](bool) /* finally block */
- {
- this->m_DeferredParseFlags = deferredParseFlagsSave;
- });
- return result;
- }
- return Scan();
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::Scan()
- {
- return ScanCore(true);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanNoKeywords()
- {
- return ScanCore(false);
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanAhead()
- {
- return ScanNoKeywords();
- }
- template<typename EncodingPolicy>
- tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
- {
- codepoint_t ch;
- OLECHAR firstChar;
- OLECHAR secondChar;
- EncodedCharPtr pchT;
- size_t multiUnits = 0;
- EncodedCharPtr p = m_currentCharacter;
- EncodedCharPtr last = m_pchLast;
- bool seenDelimitedCommentEnd = false;
- // store the last token
- m_tkPrevious = m_ptoken->tk;
- m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas
- if (p >= last)
- {
- m_pchMinTok = p;
- m_cMinTokMultiUnits = this->m_cMultiUnits;
- goto LEof;
- }
- tokens token;
- m_fHadEol = FALSE;
- CharTypes chType;
- charcount_t commentStartLine;
- if (m_scanState && *p != 0)
- {
- if (m_scanState == ScanStateStringTemplateMiddleOrEnd)
- {
- AssertMsg(m_fStringTemplateDepth > 0,
- "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template");
- m_scanState = ScanStateNormal;
- pchT = p;
- token = ScanStringTemplateMiddleOrEnd(&pchT);
- p = pchT;
- goto LDone;
- }
- }
- for (;;)
- {
- LLoop:
- m_pchMinTok = p;
- m_cMinTokMultiUnits = this->m_cMultiUnits;
- ch = this->ReadFirst(p, last);
- #if DEBUG
- chType = this->charClassifier->GetCharType((OLECHAR)ch);
- #endif
- switch (ch)
- {
- default:
- if (ch == kchLS ||
- ch == kchPS )
- {
- goto LNewLine;
- }
- {
- BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch);
- if (isMultiUnit)
- {
- ch = this->template ReadRest<true>((OLECHAR)ch, p, last);
- }
- if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
- {
- codepoint_t upper = this->PeekFull(p, last);
- if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
- {
- // Consume the rest of the utf8 bytes for the codepoint
- OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);
- Assert(decodedUpper == (OLECHAR) upper);
- ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);
- }
- }
- if (this->charClassifier->IsIdStart(ch))
- {
- // We treat IDContinue as an error.
- token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p);
- break;
- }
- }
- chType = this->charClassifier->GetCharType(ch);
- switch (chType)
- {
- case _C_WSP: continue;
- case _C_NWL: goto LNewLine;
- // All other types (except errors) are handled by the outer switch.
- }
- Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG);
- m_currentCharacter = p - 1;
- Error(ERRillegalChar);
- continue;
- case '\0':
- // Put back the null in case we get called again.
- p--;
- if (p < last)
- {
- // A \0 prior to the end of the text is an invalid character.
- Error(ERRillegalChar);
- }
- LEof:
- Assert(p >= last);
- token = tkEOF;
- break;
- case 0x0009:
- case 0x000B:
- case 0x000C:
- case 0x0020:
- Assert(chType == _C_WSP);
- continue;
- case '.':
- if (!Js::NumberUtilities::IsDigit(*p))
- {
- // Not a double
- if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() &&
- this->PeekFirst(p, last) == '.' &&
- this->PeekFirst(p + 1, last) == '.')
- {
- token = tkEllipsis;
- p += 2;
- }
- else
- {
- token = tkDot;
- }
- break;
- }
- // May be a double, fall through
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- {
- double dbl;
- Assert(chType == _C_DIG || chType == _C_DOT);
- p = m_pchMinTok;
- this->RestoreMultiUnits(m_cMinTokMultiUnits);
- bool likelyInt = true;
- pchT = FScanNumber(p, &dbl, likelyInt);
- if (p == pchT)
- {
- Assert(this->PeekFirst(p, last) != '.');
- Error(ERRbadNumber);
- }
- Assert(!Js::NumberUtilities::IsNan(dbl));
- p = pchT;
- int32 value;
- if (likelyInt && Js::NumberUtilities::FDblIsInt32(dbl, &value))
- {
- m_ptoken->SetLong(value);
- token = tkIntCon;
- }
- else
- {
- token = tkFltCon;
- m_ptoken->SetDouble(dbl, likelyInt);
- }
- break;
- }
- case '(': Assert(chType == _C_LPR); token = tkLParen; break;
- case ')': Assert(chType == _C_RPR); token = tkRParen; break;
- case ',': Assert(chType == _C_CMA); token = tkComma; break;
- case ';': Assert(chType == _C_SMC); token = tkSColon; break;
- case '[': Assert(chType == _C_LBR); token = tkLBrack; break;
- case ']': Assert(chType == _C_RBR); token = tkRBrack; break;
- case '~': Assert(chType == _C_TIL); token = tkTilde; break;
- case '?': Assert(chType == _C_QUE); token = tkQMark; break;
- case '{': Assert(chType == _C_LC); token = tkLCurly; break;
- // ES 2015 11.3 Line Terminators
- case '\r':
- case '\n':
- // kchLS:
- // kchPS:
- LNewLine:
- m_currentCharacter = p;
- ScanNewLine(ch);
- p = m_currentCharacter;
- m_fHadEol = TRUE;
- continue;
- LReserved:
- {
- // We will derive the PID from the token
- Assert(token < tkID);
- m_ptoken->SetIdentifier(NULL);
- goto LDone;
- }
- LEval:
- {
- token = tkID;
- if (!this->m_parser) goto LIdentifier;
- m_ptoken->SetIdentifier(this->m_parser->GetEvalPid());
- goto LDone;
- }
- LArguments:
- {
- token = tkID;
- if (!this->m_parser) goto LIdentifier;
- m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid());
- goto LDone;
- }
- LTarget:
- {
- token = tkID;
- if (!this->m_parser) goto LIdentifier;
- m_ptoken->SetIdentifier(this->m_parser->GetTargetPid());
- goto LDone;
- }
- #include "kwd-swtch.h"
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F': case 'G': case 'H': case 'I': case 'J':
- case 'K': case 'L': case 'M': case 'N': case 'O':
- case 'P': case 'Q': case 'R': case 'S': case 'T':
- case 'U': case 'V': case 'W': case 'X': case 'Y':
- case 'Z':
- // Lower-case letters handled in kwd-swtch.h above during reserved word recognition.
- case '$': case '_':
- LIdentifier:
- Assert(this->charClassifier->IsIdStart(ch));
- Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch));
- token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p);
- break;
- case '`':
- Assert(chType == _C_BKQ);
- pchT = p;
- token = ScanStringTemplateBegin(&pchT);
- p = pchT;
- break;
- case '}':
- Assert(chType == _C_RC);
- token = tkRCurly;
- break;
- case '\\':
- pchT = p - 1;
- token = ScanIdentifier(identifyKwds, &pchT);
- if (tkScanError == token)
- {
- m_currentCharacter = p;
- Error(ERRillegalChar);
- }
- p = pchT;
- break;
- case ':':
- token = tkColon;
- break;
- case '=':
- token = tkAsg;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkEQ;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkEqv;
- }
- break;
- case '>':
- p++;
- token = tkDArrow;
- break;
- }
- break;
- case '!':
- token = tkBang;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkNE;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkNEqv;
- }
- }
- break;
- case '+':
- token = tkAdd;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgAdd;
- break;
- case '+':
- p++;
- token = tkInc;
- break;
- }
- break;
- case '-':
- token = tkSub;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgSub;
- break;
- case '-':
- p++;
- token = tkDec;
- if (!m_fIsModuleCode)
- {
- // https://tc39.github.io/ecma262/#prod-annexB-MultiLineComment
- // If there was a new line in the multi-line comment, the text after --> is a comment.
- if ('>' == this->PeekFirst(p, last) && m_fHadEol)
- {
- goto LSkipLineComment;
- }
- }
- break;
- }
- break;
- case '*':
- token = tkStar;
- switch(this->PeekFirst(p, last))
- {
- case '=' :
- p++;
- token = tkAsgMul;
- break;
- case '*' :
- if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled())
- {
- break;
- }
- p++;
- token = tkExpo;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgExpo;
- }
- }
- break;
- case '/':
- token = tkDiv;
- switch(this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgDiv;
- break;
- case '/':
- if (p >= last)
- {
- AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?");
- // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning
- // those, we have passed "last" already. Move back and return EOF.
- p = last;
- goto LEof;
- }
- ch = *++p;
- firstChar = (OLECHAR)ch;
- LSkipLineComment:
- pchT = NULL;
- for (;;)
- {
- switch ((ch = this->ReadFirst(p, last)))
- {
- case kchLS: // 0x2028, classifies as new line
- case kchPS: // 0x2029, classifies as new line
- LEcmaCommentLineBreak:
- // kchPS and kchLS are more than one unit in UTF-8.
- if (pchT)
- {
- // kchPS and kchLS are more than one unit in UTF-8.
- p = pchT;
- }
- else
- {
- // But only a single code unit in UTF16
- p--;
- }
- this->RestoreMultiUnits(multiUnits);
- goto LCommentLineBreak;
- case kchNWL:
- case kchRET:
- p--;
- LCommentLineBreak:
- // Subtract the comment length from the total char count for the purpose
- // of deciding whether to defer AST and byte code generation.
- m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok));
- break;
- case kchNUL:
- // Because we used ReadFirst, we have advanced p. The character that we are looking at is actually is p - 1.
- // If p == last, we are looking at p - 1, it is still within the source buffer, and we need to consider it part of the comment
- // Only if p > last that we have pass the source buffer and consider it a line break
- if (p > last)
- {
- p--;
- goto LCommentLineBreak;
- }
- continue;
- default:
- if (this->IsMultiUnitChar((OLECHAR)ch))
- {
- pchT = p - 1;
- multiUnits = this->m_cMultiUnits;
- switch (ch = this->template ReadRest<true>((OLECHAR)ch, p, last))
- {
- case kchLS:
- case kchPS:
- goto LEcmaCommentLineBreak;
- }
- }
- continue;
- }
- break;
- }
- continue;
- case '*':
- ch = *++p;
- firstChar = (OLECHAR)ch;
- if ((p + 1) < last)
- {
- secondChar = (OLECHAR)(*(p + 1));
- }
- else
- {
- secondChar = '\0';
- }
- pchT = p;
- commentStartLine = m_line;
- bool containTypeDef;
- if (tkNone == (token = SkipComment(&pchT, &containTypeDef)))
- {
- // Subtract the comment length from the total char count for the purpose
- // of deciding whether to defer AST and byte code generation.
- m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok));
- p = pchT;
- seenDelimitedCommentEnd = true;
- goto LLoop;
- }
- p = pchT;
- break;
- }
- break;
- case '%':
- Assert(chType == _C_PCT);
- token = tkPct;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgMod;
- }
- break;
- case '<':
- Assert(chType == _C_LT);
- token = tkLT;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkLE;
- break;
- case '<':
- p++;
- token = tkLsh;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgLsh;
- break;
- }
- break;
- case '!':
- // ES 2015 B.1.3 - HTML comments are only allowed when parsing non-module code.
- if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-')
- {
- // This is a "<!--" comment - treat as //
- if (p >= last)
- {
- // Effective source length may have excluded HTMLCommentSuffix "<!-- ... -->". If we are scanning
- // those, we have passed "last" already. Move back and return EOF.
- p = last;
- goto LEof;
- }
- firstChar = '!';
- goto LSkipLineComment;
- }
- break;
- }
- break;
- case '>':
- Assert(chType == _C_GT);
- token = tkGT;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkGE;
- break;
- case '>':
- p++;
- token = tkRsh;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgRsh;
- break;
- case '>':
- p++;
- token = tkRs2;
- if (*p == '=')
- {
- p++;
- token = tkAsgRs2;
- }
- break;
- }
- break;
- }
- break;
- case '^':
- Assert(chType == _C_XOR);
- token = tkXor;
- if (this->PeekFirst(p, last) == '=')
- {
- p++;
- token = tkAsgXor;
- }
- break;
- case '|':
- Assert(chType == _C_BAR);
- token = tkOr;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgOr;
- break;
- case '|':
- p++;
- token = tkLogOr;
- break;
- }
- break;
- case '&':
- Assert(chType == _C_AMP);
- token = tkAnd;
- switch (this->PeekFirst(p, last))
- {
- case '=':
- p++;
- token = tkAsgAnd;
- break;
- case '&':
- p++;
- token = tkLogAnd;
- break;
- }
- break;
- case '\'':
- case '"':
- Assert(chType == _C_QUO || chType == _C_APO);
- pchT = p;
- token = this->ScanStringConstant((OLECHAR)ch, &pchT);
- p = pchT;
- break;
- }
- break;
- }
- LDone:
- m_currentCharacter = p;
- return (m_ptoken->tk = token);
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::GetSecondaryBufferAsPid()
- {
- bool createPid = true;
- if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
- {
- createPid = false;
- }
- if (createPid)
- {
- return m_phtbl->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur);
- }
- else
- {
- return nullptr;
- }
- }
- template <typename EncodingPolicy>
- LPCOLESTR Scanner<EncodingPolicy>::StringFromLong(int32 lw)
- {
- _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10);
- return m_tempChBuf.m_prgch;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidFromLong(int32 lw)
- {
- return m_phtbl->PidHashName(StringFromLong(lw));
- }
- template <typename EncodingPolicy>
- LPCOLESTR Scanner<EncodingPolicy>::StringFromDbl(double dbl)
- {
- if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax))
- {
- Error(ERRnoMemory);
- }
- return m_tempChBuf.m_prgch;
- }
- template <typename EncodingPolicy>
- IdentPtr Scanner<EncodingPolicy>::PidFromDbl(double dbl)
- {
- return m_phtbl->PidHashName(StringFromDbl(dbl));
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint)
- {
- Capture(restorePoint, 0, 0);
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr)
- {
- restorePoint->m_ichMinTok = this->IchMinTok();
- restorePoint->m_ichMinLine = this->IchMinLine();
- restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits;
- restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits;
- restorePoint->m_line = this->m_line;
- restorePoint->m_fHadEol = this->m_fHadEol;
- restorePoint->functionIdIncrement = functionIdIncrement;
- restorePoint->lengthDecr = lengthDecr;
- #ifdef DEBUG
- restorePoint->m_cMultiUnits = this->m_cMultiUnits;
- #endif
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint)
- {
- SeekAndScan<false>(restorePoint);
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SeekToForcingPid(const RestorePoint& restorePoint)
- {
- SeekAndScan<true>(restorePoint);
- }
- template <typename EncodingPolicy>
- template <bool forcePid>
- void Scanner<EncodingPolicy>::SeekAndScan(const RestorePoint& restorePoint)
- {
- this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits;
- this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits;
- this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits;
- this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits);
- if (forcePid)
- {
- this->ScanForcingPid();
- }
- else
- {
- this->Scan();
- }
- this->m_line = restorePoint.m_line;
- this->m_fHadEol = restorePoint.m_fHadEol;
- this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr);
- Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits);
- }
- template <typename EncodingPolicy>
- void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId)
- {
- SeekTo(restorePoint);
- *nextFunctionId += restorePoint.functionIdIncrement;
- }
- // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred.
- template<typename EncodingPolicy>
- HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine)
- {
- if( !pbstrLine )
- {
- return E_POINTER;
- }
- // If we overflow the string, we have a serious problem...
- if (ichMinLine < 0 || static_cast<size_t>(ichMinLine) > AdjustedLength() )
- {
- return E_UNEXPECTED;
- }
- typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine);
- // Determine the length by scanning for the next newline
- charcount_t cch = LineLength(pStart, m_pchLast);
- Assert(cch <= LONG_MAX);
- typename EncodingPolicy::EncodedCharPtr pEnd = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine + cch : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch);
- *pbstrLine = SysAllocStringLen(NULL, cch);
- if (!*pbstrLine)
- {
- return E_OUTOFMEMORY;
- }
- this->ConvertToUnicode(*pbstrLine, cch, pStart, pEnd);
- return S_OK;
- }
- template class Scanner<NotNullTerminatedUTF8EncodingPolicy>;
|