Scan.cpp 81 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. /*****************************************************************************
  7. *
  8. * The following table speeds various tests of characters, such as whether
  9. * a given character can be part of an identifier, and so on.
  10. */
  11. int CountNewlines(LPCOLESTR psz, int cch)
  12. {
  13. int cln = 0;
  14. while (0 != *psz && 0 != cch--)
  15. {
  16. switch (*psz++)
  17. {
  18. case _u('\xD'):
  19. if (*psz == _u('\xA'))
  20. {
  21. ++psz;
  22. if (0 == cch--)
  23. break;
  24. }
  25. // fall-through
  26. case _u('\xA'):
  27. cln++;
  28. break;
  29. }
  30. }
  31. return cln;
  32. }
  33. template< typename CharT >
  34. struct AorW
  35. {
  36. };
  37. // Specialization for UTF8Char
  38. template<>
  39. struct AorW< UTF8Char >
  40. {
  41. // Expressing the args as "arrays of size N" ensures that the both args
  42. // are the same length. If not, we get a compile time error.
  43. template< size_t N >
  44. static const UTF8Char* Choose( const char (&a)[N], const char16 (&w)[N] )
  45. {
  46. // The reinterpret_cast is necessary to go from signed to unsigned char
  47. return reinterpret_cast< const UTF8Char* >(a);
  48. }
  49. template< size_t N >
  50. static const bool Test(const char (&a)[N], const char16 (&w)[N], LPCUTF8 value)
  51. {
  52. return 0 == memcmp(a, value, (N - 1) * sizeof(utf8char_t));
  53. }
  54. template< size_t N >
  55. static const bool Test(const char (&a)[N], const char16 (&w)[N], LPCUTF8 start, LPCUTF8 end)
  56. {
  57. return (end - start == N - 1) && (0 == memcmp(a, start, (N - 1) * sizeof(utf8char_t)));
  58. }
  59. };
  60. // Specialization for OLECHAR
  61. template<>
  62. struct AorW< OLECHAR >
  63. {
  64. template< size_t N >
  65. static const char16* Choose( const char (&a)[N], const char16 (&w)[N] )
  66. {
  67. return w;
  68. }
  69. template < size_t N >
  70. static bool Test(const char (&a)[N], const char16 (&w)[N], const char16 *value)
  71. {
  72. return 0 == memcmp(w, value, (N - 1) * sizeof(char16));
  73. }
  74. template < size_t N >
  75. static bool Test(const char (&a)[N], const char16 (&w)[N], const char16 *start, const char16 *end)
  76. {
  77. return (end - start == N - 1) && (0 == memcmp(w, start, (N - 1) * sizeof(char16)));
  78. }
  79. };
  80. BOOL Token::IsKeyword() const
  81. {
  82. // keywords (but not future reserved words)
  83. return (tk <= tkYIELD);
  84. }
  85. tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser)
  86. {
  87. Assert(parser);
  88. if(pattern)
  89. parser->RegisterRegexPattern(pattern);
  90. this->u.pattern = pattern;
  91. return tk = tkRegExp;
  92. }
  93. IdentPtr Token::CreateIdentifier(HashTbl * hashTbl)
  94. {
  95. Assert(this->u.pid == nullptr);
  96. if (this->u.pchMin)
  97. {
  98. Assert(IsIdentifier());
  99. IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.length);
  100. this->u.pid = pid;
  101. return pid;
  102. }
  103. Assert(IsReservedWord());
  104. IdentPtr pid = hashTbl->PidFromTk(tk);
  105. this->u.pid = pid;
  106. return pid;
  107. }
  108. template <typename EncodingPolicy>
  109. Scanner<EncodingPolicy>::Scanner(Parser* parser, HashTbl *phtbl, Token *ptoken, ErrHandler *perr, Js::ScriptContext* scriptContext)
  110. {
  111. AssertMem(phtbl);
  112. AssertMem(ptoken);
  113. AssertMem(perr);
  114. m_parser = parser;
  115. m_phtbl = phtbl;
  116. m_ptoken = ptoken;
  117. m_cMinLineMultiUnits = 0;
  118. m_perr = perr;
  119. m_fHadEol = FALSE;
  120. m_doubleQuoteOnLastTkStrCon = FALSE;
  121. m_OctOrLeadingZeroOnLastTKNumber = false;
  122. m_fStringTemplateDepth = 0;
  123. m_scanState = ScanStateNormal;
  124. m_scriptContext = scriptContext;
  125. m_line = 0;
  126. m_startLine = 0;
  127. m_pchStartLine = NULL;
  128. m_ichMinError = 0;
  129. m_ichLimError = 0;
  130. m_tempChBuf.m_pscanner = this;
  131. m_tempChBufSecondary.m_pscanner = this;
  132. m_iecpLimTokPrevious = (size_t)-1;
  133. this->charClassifier = scriptContext->GetCharClassifier();
  134. this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled();
  135. m_fYieldIsKeyword = false;
  136. m_fAwaitIsKeyword = false;
  137. m_typeAnnotationsOn = Js::Configuration::Global.flags.TypeAnnotations;
  138. }
  139. template <typename EncodingPolicy>
  140. Scanner<EncodingPolicy>::~Scanner(void)
  141. {
  142. }
  143. /*****************************************************************************
  144. *
  145. * Initializes the scanner to prepare to scan the given source text.
  146. */
  147. template <typename EncodingPolicy>
  148. void Scanner<EncodingPolicy>::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, ULONG grfscr, ULONG lineNumber)
  149. {
  150. // Save the start of the script and add the offset to get the point where we should start scanning.
  151. m_pchBase = pszSrc;
  152. m_pchLast = m_pchBase + offset + length;
  153. m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset;
  154. this->RestoreMultiUnits(offset - charOffset);
  155. // Absorb any byte order mark at the start
  156. if(offset == 0)
  157. {
  158. switch( this->PeekFull(m_currentCharacter, m_pchLast) )
  159. {
  160. case 0xFFEE: // "Opposite" endian BOM
  161. // We do not support big-endian encodings
  162. // fall-through
  163. case 0xFEFF: // "Correct" BOM
  164. this->template ReadFull<true>(m_currentCharacter, m_pchLast);
  165. break;
  166. }
  167. }
  168. m_line = lineNumber;
  169. m_startLine = lineNumber;
  170. m_pchStartLine = m_currentCharacter;
  171. m_ptoken->tk = tkNone;
  172. m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0;
  173. m_fHadEol = FALSE;
  174. m_fSyntaxColor = (grfscr & fscrSyntaxColor) != 0;
  175. m_DeferredParseFlags = ScanFlagNone;
  176. }
  177. template <typename EncodingPolicy>
  178. void Scanner<EncodingPolicy>::PrepareForBackgroundParse(Js::ScriptContext *scriptContext)
  179. {
  180. scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
  181. scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  182. }
  183. //-----------------------------------------------------------------------------
  184. // Number of code points from 'first' up to, but not including the next
  185. // newline character, embedded NUL, or 'last', depending on which comes first.
  186. //
  187. // This is used to determine a length of BSTR, which can't contain a NUL character.
  188. //-----------------------------------------------------------------------------
  189. template <typename EncodingPolicy>
  190. charcount_t Scanner<EncodingPolicy>::LineLength(EncodedCharPtr first, EncodedCharPtr last)
  191. {
  192. charcount_t result = 0;
  193. EncodedCharPtr p = first;
  194. for (;;)
  195. {
  196. switch( this->template ReadFull<false>(p, last) )
  197. {
  198. case kchNWL: // _C_NWL
  199. case kchRET:
  200. case kchLS:
  201. case kchPS:
  202. case kchNUL: // _C_NUL
  203. return result;
  204. }
  205. result++;
  206. }
  207. }
  208. template <typename EncodingPolicy>
  209. charcount_t Scanner<EncodingPolicy>::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd)
  210. {
  211. EncodedCharPtr p = start;
  212. charcount_t ich = ichStart;
  213. int32 current = line;
  214. charcount_t lastStart = ichStart;
  215. while (ich < ichEnd)
  216. {
  217. ich++;
  218. switch (this->template ReadFull<false>(p, last))
  219. {
  220. case kchRET:
  221. if (this->PeekFull(p, last) == kchNWL)
  222. {
  223. ich++;
  224. this->template ReadFull<false>(p, last);
  225. }
  226. // fall-through
  227. case kchNWL:
  228. case kchLS:
  229. case kchPS:
  230. current++;
  231. lastStart = ich;
  232. break;
  233. case kchNUL:
  234. goto done;
  235. }
  236. }
  237. done:
  238. line = current;
  239. return lastStart;
  240. }
  241. template <typename EncodingPolicy>
  242. bool Scanner<EncodingPolicy>::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar)
  243. {
  244. Assert(outChar != nullptr);
  245. Assert(startingLocation <= endOfSource);
  246. EncodedCharPtr currentLocation = startingLocation;
  247. codepoint_t charToOutput = 0x0;
  248. // '\' is Assumed as there is only one caller
  249. // Read 'u' characters
  250. if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u')
  251. {
  252. return false;
  253. }
  254. bool expectCurly = false;
  255. if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode)
  256. {
  257. expectCurly = true;
  258. // Move past the character
  259. this->ReadFirst(currentLocation, endOfSource);
  260. }
  261. uint i = 0;
  262. OLECHAR ch = 0;
  263. int hexValue = 0;
  264. uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u);
  265. for(; i < maxHexDigits && currentLocation < endOfSource; i++)
  266. {
  267. if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue))
  268. {
  269. break;
  270. }
  271. charToOutput = charToOutput * 0x10 + hexValue;
  272. if (charToOutput > 0x10FFFF)
  273. {
  274. return false;
  275. }
  276. }
  277. //At least 4 characters have to be read
  278. if (i == 0 || (i != 4 && !expectCurly))
  279. {
  280. return false;
  281. }
  282. Assert(expectCurly ? es6UnicodeMode : true);
  283. if (expectCurly && ch != '}')
  284. {
  285. return false;
  286. }
  287. *outChar = charToOutput;
  288. startingLocation = currentLocation;
  289. return true;
  290. }
  291. template <typename EncodingPolicy>
  292. template <bool bScan>
  293. bool Scanner<EncodingPolicy>::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar)
  294. {
  295. Assert(outChar != nullptr);
  296. Assert(outContainsMultiUnitChar != nullptr);
  297. Assert(es6UnicodeMode);
  298. Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower));
  299. EncodedCharPtr currentLocation = startingLocation;
  300. *outChar = lower;
  301. if (currentLocation < endOfSource)
  302. {
  303. size_t restorePoint = this->m_cMultiUnits;
  304. codepoint_t upper = this->template ReadFull<bScan>(currentLocation, endOfSource);
  305. if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
  306. {
  307. *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper);
  308. if (this->IsMultiUnitChar(static_cast<OLECHAR>(upper)))
  309. {
  310. *outContainsMultiUnitChar = true;
  311. }
  312. startingLocation = currentLocation;
  313. }
  314. else
  315. {
  316. this->RestoreMultiUnits(restorePoint);
  317. }
  318. }
  319. return true;
  320. }
  321. template <typename EncodingPolicy>
  322. template <bool bScan>
  323. inline bool Scanner<EncodingPolicy>::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar)
  324. {
  325. Assert(outChar != nullptr);
  326. Assert(outContainsMultiUnitChar != nullptr);
  327. if (startingLocation >= endOfSource)
  328. {
  329. return false;
  330. }
  331. codepoint_t ch = this->template ReadFull<bScan>(startingLocation, endOfSource);
  332. if (FBigChar(ch))
  333. {
  334. if (this->IsMultiUnitChar(static_cast<OLECHAR>(ch)))
  335. {
  336. *outContainsMultiUnitChar = true;
  337. }
  338. if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
  339. {
  340. return TryReadCodePointRest<bScan>(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar);
  341. }
  342. }
  343. else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch))
  344. {
  345. *hasEscape = true;
  346. }
  347. *outChar = ch;
  348. return true;
  349. }
  350. template <typename EncodingPolicy>
  351. tokens Scanner<EncodingPolicy>::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp)
  352. {
  353. EncodedCharPtr p = *pp;
  354. EncodedCharPtr pchMin = p;
  355. // JS6 allows unicode characters in the form of \uxxxx escape sequences
  356. // to be part of the identifier.
  357. bool fHasEscape = false;
  358. bool fHasMultiChar = false;
  359. codepoint_t codePoint = INVALID_CODEPOINT;
  360. size_t multiUnitsBeforeLast = this->m_cMultiUnits;
  361. // Check if we started the id
  362. if (!TryReadCodePoint<true>(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar))
  363. {
  364. // If no chars. could be scanned as part of the identifier, return error.
  365. return tkScanError;
  366. }
  367. Assert(codePoint < 0x110000u);
  368. if (!charClassifier->IsIdStart(codePoint))
  369. {
  370. // Put back the last character
  371. this->RestoreMultiUnits(multiUnitsBeforeLast);
  372. // If no chars. could be scanned as part of the identifier, return error.
  373. return tkScanError;
  374. }
  375. return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp);
  376. }
  377. template <typename EncodingPolicy>
  378. BOOL Scanner<EncodingPolicy>::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last)
  379. {
  380. if (EncodingPolicy::MultiUnitEncoding)
  381. {
  382. while (p < last)
  383. {
  384. EncodedChar currentChar = *p;
  385. if (this->IsMultiUnitChar(currentChar))
  386. {
  387. // multi unit character, we may not have reach the end yet
  388. return FALSE;
  389. }
  390. Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast<false>(currentChar));
  391. if (!charClassifier->IsIdContinueFast<false>(currentChar))
  392. {
  393. // only reach the end of the identifier if it is not the start of an escape sequence
  394. return currentChar != '\\';
  395. }
  396. p++;
  397. }
  398. // We have reach the end of the identifier.
  399. return TRUE;
  400. }
  401. // Not fast path for non multi unit encoding
  402. return false;
  403. }
  404. template <typename EncodingPolicy>
  405. tokens Scanner<EncodingPolicy>::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar,
  406. EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp)
  407. {
  408. EncodedCharPtr last = m_pchLast;
  409. while (true)
  410. {
  411. // Fast path for utf8, non-multi unit char and not escape
  412. if (FastIdentifierContinue(p, last))
  413. {
  414. break;
  415. }
  416. // Slow path that has to deal with multi unit encoding
  417. codepoint_t codePoint = INVALID_CODEPOINT;
  418. EncodedCharPtr pchBeforeLast = p;
  419. size_t multiUnitsBeforeLast = this->m_cMultiUnits;
  420. if (TryReadCodePoint<true>(p, last, &codePoint, &fHasEscape, &fHasMultiChar))
  421. {
  422. Assert(codePoint < 0x110000u);
  423. if (charClassifier->IsIdContinue(codePoint))
  424. {
  425. continue;
  426. }
  427. }
  428. // Put back the last character
  429. p = pchBeforeLast;
  430. this->RestoreMultiUnits(multiUnitsBeforeLast);
  431. break;
  432. }
  433. Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX);
  434. *pp = p;
  435. if (!identifyKwds)
  436. {
  437. return tkID;
  438. }
  439. // During syntax coloring, scanner doesn't need to convert the escape sequence to get actual characters, it just needs the classification information
  440. // So call up hashtables custom method to check if the string scanned is identifier or keyword.
  441. // Do the same for deferred parsing, but use a custom method that only tokenizes JS keywords.
  442. if ((m_DeferredParseFlags & ScanFlagSuppressIdPid) != 0)
  443. {
  444. m_ptoken->SetIdentifier(NULL);
  445. if (!fHasEscape)
  446. {
  447. // If there are no escape, that the main scan loop would have found the keyword already
  448. // So we can just assume it is an ID
  449. DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
  450. DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
  451. Assert(tk == tkID || (tk == tkYIELD && !m_fYieldIsKeyword) || (tk == tkAWAIT && !m_fAwaitIsKeyword));
  452. return tkID;
  453. }
  454. int32 cch = UnescapeToTempBuf(pchMin, p);
  455. tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode());
  456. return (!m_fYieldIsKeyword && tk == tkYIELD) || (!m_fAwaitIsKeyword && tk == tkAWAIT) ? tkID : tk;
  457. }
  458. else if (m_fSyntaxColor)
  459. {
  460. m_ptoken->SetIdentifier(NULL);
  461. // We always need to check TkFromNameLenColor because
  462. // the main Scan switch doesn't detect all non-keyword that needs coloring
  463. // (e.g. int)
  464. int32 cch = UnescapeToTempBuf(pchMin, p);
  465. return m_phtbl->TkFromNameLenColor(m_tempChBuf.m_prgch, cch);
  466. }
  467. // UTF16 Scanner are only for syntax coloring, so it shouldn't come here.
  468. if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape)
  469. {
  470. Assert(sizeof(EncodedChar) == 1);
  471. // If there are no escape, that the main scan loop would have found the keyword already
  472. // So we can just assume it is an ID
  473. DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
  474. DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
  475. Assert(tk == tkID || (tk == tkYIELD && !m_fYieldIsKeyword) || (tk == tkAWAIT && !m_fAwaitIsKeyword));
  476. m_ptoken->SetIdentifier(reinterpret_cast<const char *>(pchMin), (int32)(p - pchMin));
  477. return tkID;
  478. }
  479. IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar);
  480. m_ptoken->SetIdentifier(pid);
  481. if (!fHasEscape)
  482. {
  483. // If it doesn't have escape, then Scan() should have taken care of keywords (except
  484. // yield if m_fYieldIsKeyword is false, in which case yield is treated as an identifier, and except
  485. // await if m_fAwaitIsKeyword is false, in which case await is treated as an identifier).
  486. // We don't have to check if the name is reserved word and return it as an Identifier
  487. Assert(pid->Tk(IsStrictMode()) == tkID
  488. || (pid->Tk(IsStrictMode()) == tkYIELD && !m_fYieldIsKeyword)
  489. || (pid->Tk(IsStrictMode()) == tkAWAIT && !m_fAwaitIsKeyword));
  490. return tkID;
  491. }
  492. tokens tk = pid->Tk(IsStrictMode());
  493. return tk == tkID || (tk == tkYIELD && !m_fYieldIsKeyword) || (tk == tkAWAIT && !m_fAwaitIsKeyword) ? tkID : tkNone;
  494. }
  495. template <typename EncodingPolicy>
  496. IdentPtr Scanner<EncodingPolicy>::PidAt(size_t iecpMin, size_t iecpLim)
  497. {
  498. Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin);
  499. return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim);
  500. }
  501. template <typename EncodingPolicy>
  502. uint32 Scanner<EncodingPolicy>::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last)
  503. {
  504. m_tempChBuf.Init();
  505. while( p < last )
  506. {
  507. codepoint_t codePoint;
  508. bool hasEscape, isMultiChar;
  509. bool gotCodePoint = TryReadCodePoint<false>(p, last, &codePoint, &hasEscape, &isMultiChar);
  510. Assert(gotCodePoint);
  511. Assert(codePoint < 0x110000);
  512. if (codePoint < 0x10000)
  513. {
  514. m_tempChBuf.AppendCh((OLECHAR)codePoint);
  515. }
  516. else
  517. {
  518. char16 lower, upper;
  519. Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper);
  520. m_tempChBuf.AppendCh(lower);
  521. m_tempChBuf.AppendCh(upper);
  522. }
  523. }
  524. return m_tempChBuf.m_ichCur;
  525. }
  526. template <typename EncodingPolicy>
  527. IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last)
  528. {
  529. int32 cch = UnescapeToTempBuf(p, last);
  530. return m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, cch);
  531. }
  532. template <typename EncodingPolicy>
  533. IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar)
  534. {
  535. // If there is an escape sequence in the JS6 identifier or it is a UTF8
  536. // source then we have to convert it to the equivalent char so we use a
  537. // buffer for translation.
  538. if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape)
  539. {
  540. return PidOfIdentiferAt(p, last);
  541. }
  542. else if (EncodingPolicy::MultiUnitEncoding)
  543. {
  544. Assert(sizeof(EncodedChar) == 1);
  545. return m_phtbl->PidHashNameLen(reinterpret_cast<const char *>(p), (int32)(last - p));
  546. }
  547. else
  548. {
  549. Assert(sizeof(EncodedChar) == 2);
  550. return m_phtbl->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p));
  551. }
  552. }
  553. template <typename EncodingPolicy>
  554. typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)
  555. {
  556. EncodedCharPtr last = m_pchLast;
  557. EncodedCharPtr pchT;
  558. likelyInt = true;
  559. // Reset
  560. m_OctOrLeadingZeroOnLastTKNumber = false;
  561. if ('0' == this->PeekFirst(p, last))
  562. {
  563. switch(this->PeekFirst(p + 1, last))
  564. {
  565. case '.':
  566. case 'e':
  567. case 'E':
  568. likelyInt = false;
  569. // Floating point
  570. goto LFloat;
  571. case 'x':
  572. case 'X':
  573. // Hex
  574. *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT);
  575. if (pchT == p + 2)
  576. {
  577. // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
  578. *pdbl = 0;
  579. return p + 1;
  580. }
  581. else
  582. return pchT;
  583. case 'o':
  584. case 'O':
  585. // Octal
  586. *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT);
  587. if (pchT == p + 2)
  588. {
  589. // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
  590. *pdbl = 0;
  591. return p + 1;
  592. }
  593. return pchT;
  594. case 'b':
  595. case 'B':
  596. // Binary
  597. *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT);
  598. if (pchT == p + 2)
  599. {
  600. // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
  601. *pdbl = 0;
  602. return p + 1;
  603. }
  604. return pchT;
  605. default:
  606. // Octal
  607. *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT);
  608. Assert(pchT > p);
  609. #if !SOURCERELEASE
  610. // If an octal literal is malformed then it is in fact a decimal literal.
  611. #endif // !SOURCERELEASE
  612. if(*pdbl != 0 || pchT > p + 1)
  613. m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
  614. switch (*pchT)
  615. {
  616. case '8':
  617. case '9':
  618. // case 'e':
  619. // case 'E':
  620. // case '.':
  621. m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
  622. goto LFloat;
  623. }
  624. return pchT;
  625. }
  626. }
  627. else
  628. {
  629. LFloat:
  630. *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyInt);
  631. Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl));
  632. return pchT;
  633. }
  634. }
  635. template <typename EncodingPolicy>
  636. BOOL Scanner<EncodingPolicy>::oFScanNumber(double *pdbl, bool& likelyInt)
  637. {
  638. EncodedCharPtr pchT;
  639. m_OctOrLeadingZeroOnLastTKNumber = false;
  640. likelyInt = true;
  641. if ('0' == *m_currentCharacter)
  642. {
  643. switch (m_currentCharacter[1])
  644. {
  645. case '.':
  646. case 'e':
  647. case 'E':
  648. likelyInt = false;
  649. // Floating point.
  650. goto LFloat;
  651. case 'x':
  652. case 'X':
  653. // Hex.
  654. *pdbl = Js::NumberUtilities::DblFromHex<EncodedChar>(m_currentCharacter + 2, &pchT);
  655. if (pchT == m_currentCharacter + 2)
  656. {
  657. // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
  658. *pdbl = 0;
  659. m_currentCharacter++;
  660. }
  661. else
  662. m_currentCharacter = pchT;
  663. break;
  664. case 'o':
  665. case 'O':
  666. *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter + 2, &pchT);
  667. if (pchT == m_currentCharacter + 2)
  668. {
  669. // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
  670. *pdbl = 0;
  671. m_currentCharacter++;
  672. }
  673. else
  674. m_currentCharacter = pchT;
  675. break;
  676. case 'b':
  677. case 'B':
  678. *pdbl = Js::NumberUtilities::DblFromBinary(m_currentCharacter + 2, &pchT);
  679. if (pchT == m_currentCharacter + 2)
  680. {
  681. // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
  682. *pdbl = 0;
  683. m_currentCharacter++;
  684. }
  685. else
  686. m_currentCharacter = pchT;
  687. break;
  688. default:
  689. // Octal.
  690. *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter, &pchT);
  691. Assert(pchT > m_currentCharacter);
  692. #if !SOURCERELEASE
  693. // If an octal literal is malformed then it is in fact a decimal literal.
  694. #endif // !SOURCERELEASE
  695. if(*pdbl != 0 || pchT > m_currentCharacter + 1)
  696. m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
  697. switch (*pchT)
  698. {
  699. case '8':
  700. case '9':
  701. // case 'e':
  702. // case 'E':
  703. // case '.':
  704. m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
  705. goto LFloat;
  706. }
  707. m_currentCharacter = pchT;
  708. break;
  709. }
  710. }
  711. else
  712. {
  713. LFloat:
  714. // Let StrToDbl do all the work.
  715. *pdbl = Js::NumberUtilities::StrToDbl(m_currentCharacter, &pchT, likelyInt);
  716. if (pchT == m_currentCharacter)
  717. return FALSE;
  718. m_currentCharacter = pchT;
  719. Assert(!Js::NumberUtilities::IsNan(*pdbl));
  720. }
  721. return TRUE;
  722. }
  723. template <typename EncodingPolicy>
  724. tokens Scanner<EncodingPolicy>::TryRescanRegExp()
  725. {
  726. EncodedCharPtr current = m_currentCharacter;
  727. tokens result = RescanRegExp();
  728. if (result == tkScanError)
  729. m_currentCharacter = current;
  730. return result;
  731. }
  732. template <typename EncodingPolicy>
  733. tokens Scanner<EncodingPolicy>::RescanRegExp()
  734. {
  735. #if DEBUG
  736. switch (m_ptoken->tk)
  737. {
  738. case tkDiv:
  739. Assert(m_currentCharacter == m_pchMinTok + 1);
  740. break;
  741. case tkAsgDiv:
  742. Assert(m_currentCharacter == m_pchMinTok + 2);
  743. break;
  744. default:
  745. AssertMsg(FALSE, "Who is calling RescanRegExp?");
  746. break;
  747. }
  748. #endif //DEBUG
  749. m_currentCharacter = m_pchMinTok;
  750. if (*m_currentCharacter != '/')
  751. Error(ERRnoSlash);
  752. m_currentCharacter++;
  753. tokens tk = tkNone;
  754. {
  755. ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
  756. tk = ScanRegExpConstant(&alloc);
  757. }
  758. return tk;
  759. }
  760. template <typename EncodingPolicy>
  761. tokens Scanner<EncodingPolicy>::RescanRegExpNoAST()
  762. {
  763. #if DEBUG
  764. switch (m_ptoken->tk)
  765. {
  766. case tkDiv:
  767. Assert(m_currentCharacter == m_pchMinTok + 1);
  768. break;
  769. case tkAsgDiv:
  770. Assert(m_currentCharacter == m_pchMinTok + 2);
  771. break;
  772. default:
  773. AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
  774. break;
  775. }
  776. #endif //DEBUG
  777. m_currentCharacter = m_pchMinTok;
  778. if (*m_currentCharacter != '/')
  779. Error(ERRnoSlash);
  780. m_currentCharacter++;
  781. tokens tk = tkNone;
  782. {
  783. ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
  784. {
  785. tk = ScanRegExpConstantNoAST(&alloc);
  786. }
  787. }
  788. return tk;
  789. }
  790. template <typename EncodingPolicy>
  791. tokens Scanner<EncodingPolicy>::RescanRegExpTokenizer()
  792. {
  793. #if DEBUG
  794. switch (m_ptoken->tk)
  795. {
  796. case tkDiv:
  797. Assert(m_currentCharacter == m_pchMinTok + 1);
  798. break;
  799. case tkAsgDiv:
  800. Assert(m_currentCharacter == m_pchMinTok + 2);
  801. break;
  802. default:
  803. AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
  804. break;
  805. }
  806. #endif //DEBUG
  807. m_currentCharacter = m_pchMinTok;
  808. if (*m_currentCharacter != '/')
  809. Error(ERRnoSlash);
  810. m_currentCharacter++;
  811. tokens tk = tkNone;
  812. ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread();
  813. threadContext->EnsureRecycler();
  814. Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp"));
  815. TryFinally(
  816. [&]() /* try block */
  817. {
  818. tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator());
  819. },
  820. [&](bool /* hasException */) /* finally block */
  821. {
  822. threadContext->ReleaseTemporaryAllocator(alloc);
  823. });
  824. return tk;
  825. }
  826. template <typename EncodingPolicy>
  827. tokens Scanner<EncodingPolicy>::ScanRegExpConstant(ArenaAllocator* alloc)
  828. {
  829. if (m_parser && m_parser->IsBackgroundParser())
  830. {
  831. PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
  832. }
  833. else
  834. {
  835. PROBE_STACK(m_scriptContext, Js::Constants::MinStackRegex);
  836. }
  837. // SEE ALSO: RegexHelper::PrimCompileDynamic()
  838. #ifdef PROFILE_EXEC
  839. m_scriptContext->ProfileBegin(Js::RegexCompilePhase);
  840. #endif
  841. ArenaAllocator* ctAllocator = alloc;
  842. UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
  843. UnifiedRegex::StandardChars<char16>* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  844. #if ENABLE_REGEX_CONFIG_OPTIONS
  845. UnifiedRegex::DebugWriter *w = 0;
  846. if (REGEX_CONFIG_FLAG(RegexDebug))
  847. w = m_scriptContext->GetRegexDebugWriter();
  848. if (REGEX_CONFIG_FLAG(RegexProfile))
  849. m_scriptContext->GetRegexStatsDatabase()->BeginProfile();
  850. #endif
  851. UnifiedRegex::Node* root = 0;
  852. charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
  853. UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
  854. UnifiedRegex::Parser<EncodingPolicy, true> parser
  855. ( m_scriptContext
  856. , ctAllocator
  857. , standardEncodedChars
  858. , standardChars
  859. , this->IsFromExternalSource()
  860. #if ENABLE_REGEX_CONFIG_OPTIONS
  861. , w
  862. #endif
  863. );
  864. try
  865. {
  866. root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags);
  867. }
  868. catch (UnifiedRegex::ParseError e)
  869. {
  870. #ifdef PROFILE_EXEC
  871. m_scriptContext->ProfileEnd(Js::RegexCompilePhase);
  872. #endif
  873. if (m_fSyntaxColor)
  874. return ScanError(m_currentCharacter + e.encodedPos, tkRegExp);
  875. m_currentCharacter += e.encodedPos;
  876. Error(e.error);
  877. }
  878. UnifiedRegex::RegexPattern* pattern;
  879. if (m_parser->IsBackgroundParser())
  880. {
  881. // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern
  882. // and hook it to this parse node.
  883. pattern = parser.template CompileProgram<false>(root, m_currentCharacter, totalLen, bodyChars, totalChars, flags);
  884. }
  885. else
  886. {
  887. pattern = parser.template CompileProgram<true>(root, m_currentCharacter, totalLen, bodyChars, totalChars, flags);
  888. }
  889. this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
  890. return m_ptoken->SetRegex(pattern, m_parser);
  891. }
  892. template<typename EncodingPolicy>
  893. tokens Scanner<EncodingPolicy>::ScanRegExpConstantNoAST(ArenaAllocator* alloc)
  894. {
  895. if (m_parser && m_parser->IsBackgroundParser())
  896. {
  897. PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
  898. }
  899. else
  900. {
  901. PROBE_STACK(m_scriptContext, Js::Constants::MinStackRegex);
  902. }
  903. ThreadContext *threadContext = m_fSyntaxColor ? ThreadContext::GetContextForCurrentThread() : m_scriptContext->GetThreadContext();
  904. UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0);
  905. UnifiedRegex::StandardChars<char16>* standardChars = threadContext->GetStandardChars((char16*)0);
  906. charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
  907. UnifiedRegex::Parser<EncodingPolicy, true> parser
  908. ( m_scriptContext
  909. , alloc
  910. , standardEncodedChars
  911. , standardChars
  912. , this->IsFromExternalSource()
  913. #if ENABLE_REGEX_CONFIG_OPTIONS
  914. , 0
  915. #endif
  916. );
  917. try
  918. {
  919. parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars);
  920. }
  921. catch (UnifiedRegex::ParseError e)
  922. {
  923. if (m_fSyntaxColor)
  924. return ScanError(m_currentCharacter + e.encodedPos, tkRegExp);
  925. m_currentCharacter += e.encodedPos;
  926. Error(e.error);
  927. // never reached
  928. }
  929. UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram<false>(nullptr, m_currentCharacter, totalLen, bodyChars, totalChars, UnifiedRegex::NoRegexFlags);
  930. Assert(pattern == nullptr); // BuildAST == false, CompileProgram should return nullptr
  931. this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
  932. return (m_ptoken->tk = tkRegExp);
  933. }
  934. template<typename EncodingPolicy>
  935. tokens Scanner<EncodingPolicy>::ScanStringTemplateBegin(EncodedCharPtr *pp)
  936. {
  937. // String template must begin with a string constant followed by '`' or '${'
  938. ScanStringConstant<true, true>('`', pp);
  939. OLECHAR ch;
  940. EncodedCharPtr last = m_pchLast;
  941. ch = this->ReadFirst(*pp, last);
  942. if (ch == '`')
  943. {
  944. // Simple string template - no substitutions
  945. return tkStrTmplBasic;
  946. }
  947. else if (ch == '$')
  948. {
  949. ch = this->ReadFirst(*pp, last);
  950. if (ch == '{')
  951. {
  952. // Next token after expr should be tkStrTmplMid or tkStrTmplEnd.
  953. // In string template scanning mode, we expect the next char to be '}'
  954. // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid
  955. m_fStringTemplateDepth++;
  956. // Regular string template begin - next is first substitution
  957. return tkStrTmplBegin;
  958. }
  959. }
  960. // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
  961. (*pp)--;
  962. return ScanError(m_currentCharacter, tkStrTmplBegin);
  963. }
  964. template<typename EncodingPolicy>
  965. tokens Scanner<EncodingPolicy>::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp)
  966. {
  967. // String template middle and end tokens must begin with a string constant
  968. ScanStringConstant<true, true>('`', pp);
  969. OLECHAR ch;
  970. EncodedCharPtr last = m_pchLast;
  971. ch = this->ReadFirst(*pp, last);
  972. if (ch == '`')
  973. {
  974. // No longer in string template scanning mode
  975. m_fStringTemplateDepth--;
  976. // This is the last part of the template ...`
  977. return tkStrTmplEnd;
  978. }
  979. else if (ch == '$')
  980. {
  981. ch = this->ReadFirst(*pp, last);
  982. if (ch == '{')
  983. {
  984. // This is just another middle part of the template }...${
  985. return tkStrTmplMid;
  986. }
  987. }
  988. // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
  989. (*pp)--;
  990. return ScanError(m_currentCharacter, tkStrTmplEnd);
  991. }
  992. template<typename EncodingPolicy>
  993. tokens Scanner<EncodingPolicy>::ScanTypeAnnotationType(EncodedCharPtr *pp)
  994. {
  995. OLECHAR ch;
  996. EncodedCharPtr p = *pp;
  997. EncodedCharPtr last = m_pchLast;
  998. tokens token;
  999. switch (ch = this->ReadFirst(p, last))
  1000. {
  1001. case 'i':
  1002. if (p[0] == 'n' && p[1] == 't' && p[2] == '}')
  1003. {
  1004. p += 3;
  1005. token = tkTypeInt;
  1006. break;
  1007. }
  1008. Error(ERRsyntax);
  1009. case 'f':
  1010. if (p[0] == 'l' && p[1] == 'o' && p[2] == 'a' && p[3] == 't' && p[4] == '}')
  1011. {
  1012. p += 5;
  1013. token = tkTypeFloat;
  1014. break;
  1015. }
  1016. Error(ERRsyntax);
  1017. case 'b':
  1018. if (p[0] == 'o' && p[1] == 'o' && p[2] == 'l' && p[3] == '}')
  1019. {
  1020. p += 4;
  1021. token = tkTypeBool;
  1022. break;
  1023. }
  1024. Error(ERRsyntax);
  1025. default:
  1026. Error(ERRsyntax);
  1027. }
  1028. //Consume the rest of the multiline comment ...*/
  1029. for (;;)
  1030. {
  1031. switch (ch = this->ReadFirst(p, last))
  1032. {
  1033. case '*':
  1034. if (*p == '/') {
  1035. *pp = p + 1;
  1036. return token;
  1037. }
  1038. }
  1039. }
  1040. }
  1041. /*****************************************************************************
  1042. *
  1043. * Parses a string constant. Note that the string value is stored in
  1044. * a volatile buffer (or allocated on the heap if too long), and thus
  1045. * the string should be saved off before the next token is scanned.
  1046. */
  1047. template<typename EncodingPolicy>
  1048. template<bool stringTemplateMode, bool createRawString>
  1049. tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
  1050. {
  1051. static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value");
  1052. OLECHAR ch, c, rawch;
  1053. int wT;
  1054. EncodedCharPtr p = *pp;
  1055. EncodedCharPtr last = m_pchLast;
  1056. // Reset
  1057. m_OctOrLeadingZeroOnLastTKNumber = false;
  1058. m_EscapeOnLastTkStrCon = FALSE;
  1059. m_tempChBuf.Init();
  1060. // Use template parameter to gate raw string creation.
  1061. // If createRawString is false, all these operations should be no-ops
  1062. if (createRawString)
  1063. {
  1064. m_tempChBufSecondary.Init();
  1065. }
  1066. for (;;)
  1067. {
  1068. switch ((rawch = ch = this->ReadFirst(p, last)))
  1069. {
  1070. case kchRET:
  1071. if (stringTemplateMode)
  1072. {
  1073. if (this->PeekFirst(p, last) == kchNWL)
  1074. {
  1075. // Eat the <LF> char, ignore return
  1076. this->ReadFirst(p, last);
  1077. }
  1078. // Both <CR> and <CR><LF> are normalized to <LF> in template cooked and raw values
  1079. ch = rawch = kchNWL;
  1080. }
  1081. LEcmaLineBreak:
  1082. // Fall through
  1083. case kchNWL:
  1084. if (stringTemplateMode)
  1085. {
  1086. // Notify the scanner to update current line, number of lines etc
  1087. NotifyScannedNewLine();
  1088. break;
  1089. }
  1090. m_currentCharacter = p - 1;
  1091. if (m_fSyntaxColor)
  1092. {
  1093. *pp = p - 1;
  1094. return ScanError(p - 1, tkStrCon);
  1095. }
  1096. Error(ERRnoStrEnd);
  1097. case '"':
  1098. case '\'':
  1099. if (ch == delim)
  1100. goto LBreak;
  1101. break;
  1102. case '`':
  1103. // In string template scan mode, don't consume the '`' - we need to differentiate
  1104. // between a closed string template and the expression open sequence - ${
  1105. if (stringTemplateMode)
  1106. {
  1107. p--;
  1108. goto LBreak;
  1109. }
  1110. // If we aren't scanning for a string template, do the default thing
  1111. goto LMainDefault;
  1112. case '$':
  1113. // If we are parsing a string literal part of a string template, ${ indicates we need to switch
  1114. // to parsing an expression.
  1115. if (stringTemplateMode && this->PeekFirst(p, last) == '{')
  1116. {
  1117. // Rewind to the $ and return
  1118. p--;
  1119. goto LBreak;
  1120. }
  1121. // If we aren't scanning for a string template, do the default thing
  1122. goto LMainDefault;
  1123. case kchNUL:
  1124. if (p >= last)
  1125. {
  1126. m_currentCharacter = p - 1;
  1127. if (m_fSyntaxColor)
  1128. {
  1129. *pp = p - 1;
  1130. return ScanError(p - 1, tkStrCon);
  1131. }
  1132. Error(ERRnoStrEnd);
  1133. }
  1134. break;
  1135. default:
  1136. LMainDefault:
  1137. if (this->IsMultiUnitChar(ch))
  1138. {
  1139. if ((ch == kchLS || ch == kchPS))
  1140. {
  1141. goto LEcmaLineBreak;
  1142. }
  1143. rawch = ch = this->template ReadRest<true>(ch, p, last);
  1144. switch (ch)
  1145. {
  1146. case kchLS: // 0x2028, classifies as new line
  1147. case kchPS: // 0x2029, classifies as new line
  1148. goto LEcmaLineBreak;
  1149. }
  1150. }
  1151. break;
  1152. case kchBSL:
  1153. // In raw mode '\\' is not an escape character, just add the char into the raw buffer.
  1154. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1155. m_EscapeOnLastTkStrCon=TRUE;
  1156. // In raw mode, we append the raw char itself and not the escaped value so save the char.
  1157. rawch = ch = this->ReadFirst(p, last);
  1158. codepoint_t codePoint = 0;
  1159. uint errorType = (uint)ERRbadHexDigit;
  1160. switch (ch)
  1161. {
  1162. case 'b':
  1163. ch = 0x08;
  1164. break;
  1165. case 't':
  1166. ch = 0x09;
  1167. break;
  1168. case 'v':
  1169. ch = 0x0B; //Only in ES5 mode
  1170. break; //same as default
  1171. case 'n':
  1172. ch = 0x0A;
  1173. break;
  1174. case 'f':
  1175. ch = 0x0C;
  1176. break;
  1177. case 'r':
  1178. ch = 0x0D;
  1179. break;
  1180. case 'x':
  1181. // Insert the 'x' here before jumping to parse the hex digits.
  1182. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1183. // 2 hex digits
  1184. ch = 0;
  1185. goto LTwoHex;
  1186. case 'u':
  1187. // Raw string just inserts a 'u' here.
  1188. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1189. ch = 0;
  1190. if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1191. goto LFourHex;
  1192. else if (c != '{' || !this->es6UnicodeMode)
  1193. goto ReturnScanError;
  1194. Assert(c == '{');
  1195. // c should definitely be a '{' which should be appended to the raw string.
  1196. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1197. //At least one digit is expected
  1198. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1199. {
  1200. goto ReturnScanError;
  1201. }
  1202. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1203. codePoint = static_cast<codepoint_t>(wT);
  1204. while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1205. {
  1206. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1207. codePoint <<= 4;
  1208. codePoint += static_cast<codepoint_t>(wT);
  1209. if (codePoint > 0x10FFFF)
  1210. {
  1211. errorType = (uint)ERRInvalidCodePoint;
  1212. goto ReturnScanError;
  1213. }
  1214. }
  1215. if (c != '}')
  1216. {
  1217. errorType = (uint)ERRMissingCurlyBrace;
  1218. goto ReturnScanError;
  1219. }
  1220. Assert(codePoint <= 0x10FFFF);
  1221. if (codePoint >= 0x10000)
  1222. {
  1223. OLECHAR lower = 0;
  1224. Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch);
  1225. m_tempChBuf.AppendCh(lower);
  1226. }
  1227. else
  1228. {
  1229. ch = (char16)codePoint;
  1230. }
  1231. // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
  1232. if (createRawString)
  1233. rawch = c;
  1234. break;
  1235. LFourHex:
  1236. codePoint = 0x0;
  1237. // Append first hex digit character to the raw string.
  1238. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1239. codePoint += static_cast<codepoint_t>(wT * 0x1000);
  1240. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1241. goto ReturnScanError;
  1242. // Append fourth (or second) hex digit character to the raw string.
  1243. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1244. codePoint += static_cast<codepoint_t>(wT * 0x0100);
  1245. LTwoHex:
  1246. // This code path doesn't expect curly.
  1247. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1248. goto ReturnScanError;
  1249. // Append first hex digit character to the raw string.
  1250. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1251. codePoint += static_cast<codepoint_t>(wT * 0x0010);
  1252. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1253. goto ReturnScanError;
  1254. codePoint += static_cast<codepoint_t>(wT);
  1255. // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
  1256. if (createRawString)
  1257. rawch = c;
  1258. if (codePoint < 0x10000)
  1259. {
  1260. ch = static_cast<OLECHAR>(codePoint);
  1261. }
  1262. else
  1263. {
  1264. goto ReturnScanError;
  1265. }
  1266. break;
  1267. case '0':
  1268. case '1':
  1269. case '2':
  1270. case '3':
  1271. // 1 to 3 octal digits
  1272. ch -= '0';
  1273. // Octal escape sequences are not allowed inside string template literals
  1274. if (stringTemplateMode)
  1275. {
  1276. c = this->PeekFirst(p, last);
  1277. if (ch != 0 || (c >= '0' && c <= '7'))
  1278. {
  1279. errorType = (uint)ERRES5NoOctal;
  1280. goto ReturnScanError;
  1281. }
  1282. break;
  1283. }
  1284. wT = (c = this->ReadFirst(p, last)) - '0';
  1285. if ((char16)wT > 7)
  1286. {
  1287. if (ch != 0 || ((char16)wT <= 9))
  1288. {
  1289. m_OctOrLeadingZeroOnLastTKNumber = true;
  1290. }
  1291. p--;
  1292. break;
  1293. }
  1294. m_OctOrLeadingZeroOnLastTKNumber = true;
  1295. ch = static_cast< OLECHAR >(ch * 8 + wT);
  1296. goto LOneOctal;
  1297. case '4':
  1298. case '5':
  1299. case '6':
  1300. case '7':
  1301. // 1 to 2 octal digits
  1302. // Octal escape sequences are not allowed inside string template literals
  1303. if (stringTemplateMode)
  1304. {
  1305. errorType = (uint)ERRES5NoOctal;
  1306. goto ReturnScanError;
  1307. }
  1308. ch -= '0';
  1309. m_OctOrLeadingZeroOnLastTKNumber = true;
  1310. LOneOctal:
  1311. wT = (c = this->ReadFirst(p, last)) - '0';
  1312. if ((char16)wT > 7)
  1313. {
  1314. p--;
  1315. break;
  1316. }
  1317. ch = static_cast< OLECHAR >(ch * 8 + wT);
  1318. break;
  1319. case kchRET: // 0xD
  1320. if (stringTemplateMode)
  1321. {
  1322. // If this is \<CR><LF> we can eat the <LF> right now
  1323. if (this->PeekFirst(p, last) == kchNWL)
  1324. {
  1325. // Eat the <LF> char, ignore return
  1326. this->ReadFirst(p, last);
  1327. }
  1328. // Both \<CR> and \<CR><LF> are normalized to \<LF> in template raw string
  1329. rawch = kchNWL;
  1330. }
  1331. case kchLS: // 0x2028, classifies as new line
  1332. case kchPS: // 0x2029, classifies as new line
  1333. case kchNWL: // 0xA
  1334. LEcmaEscapeLineBreak:
  1335. if (stringTemplateMode)
  1336. {
  1337. // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings
  1338. m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
  1339. // Template literal strings ignore all escaped line continuation tokens
  1340. NotifyScannedNewLine();
  1341. continue;
  1342. }
  1343. m_currentCharacter = p;
  1344. ScanNewLine(ch);
  1345. p = m_currentCharacter;
  1346. if (m_fSyntaxColor && *p == 0)
  1347. {
  1348. // Special case for multi-line strings during colorization.
  1349. m_scanState = delim == '"' ? ScanStateMultiLineDoubleQuoteString : ScanStateMultiLineSingleQuoteString;
  1350. *pp = p;
  1351. return tkStrCon;
  1352. }
  1353. continue;
  1354. case 0:
  1355. if (p >= last)
  1356. {
  1357. errorType = (uint)ERRnoStrEnd;
  1358. ReturnScanError:
  1359. m_currentCharacter = p - 1;
  1360. if (m_fSyntaxColor)
  1361. {
  1362. *pp = p - 1;
  1363. return ScanError(p - 1, tkStrCon);
  1364. }
  1365. Error(errorType);
  1366. }
  1367. else if (stringTemplateMode)
  1368. {
  1369. // Escaped null character is translated into 0x0030 for raw template literals
  1370. rawch = 0x0030;
  1371. }
  1372. break;
  1373. default:
  1374. if (this->IsMultiUnitChar(ch))
  1375. {
  1376. rawch = ch = this->template ReadRest<true>(ch, p, last);
  1377. switch (ch)
  1378. {
  1379. case kchLS:
  1380. case kchPS:
  1381. goto LEcmaEscapeLineBreak;
  1382. }
  1383. }
  1384. break;
  1385. }
  1386. break;
  1387. }
  1388. m_tempChBuf.AppendCh(ch);
  1389. m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
  1390. }
  1391. LBreak:
  1392. bool createPid = true;
  1393. if (m_fSyntaxColor || (m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
  1394. {
  1395. createPid = false;
  1396. if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR))))
  1397. {
  1398. createPid = true;
  1399. }
  1400. }
  1401. if (createPid)
  1402. {
  1403. m_ptoken->SetIdentifier(m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur));
  1404. }
  1405. else
  1406. {
  1407. m_ptoken->SetIdentifier(NULL);
  1408. }
  1409. m_scanState = ScanStateNormal;
  1410. m_doubleQuoteOnLastTkStrCon = '"' == delim;
  1411. *pp = p;
  1412. return tkStrCon;
  1413. }
  1414. template<typename EncodingPolicy>
  1415. tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
  1416. {
  1417. return ScanStringConstant<false, false>(delim, pp);
  1418. }
  1419. /*****************************************************************************
  1420. *
  1421. * Scan annotations used for type hints and function ids.
  1422. * Pointer is expected to be inside a multiline comment
  1423. * Return tkNone if there are no annotations inside this comment
  1424. */
  1425. template<typename EncodingPolicy>
  1426. tokens Scanner<EncodingPolicy>::ScanAnnotations(EncodedCharPtr *pp)
  1427. {
  1428. EncodedCharPtr p = *pp;
  1429. EncodedCharPtr last = m_pchLast;
  1430. OLECHAR ch;
  1431. //FCASTE: IMPORTANT
  1432. for (;;)
  1433. {
  1434. switch ((ch = this->ReadFirst(p, last)))
  1435. {
  1436. case '*':
  1437. if (*p == '/')
  1438. {
  1439. *pp = p + 1;
  1440. if (m_fSyntaxColor)
  1441. {
  1442. m_scanState = ScanStateNormal;
  1443. return tkComment;
  1444. }
  1445. return tkNone;
  1446. }
  1447. break;
  1448. case '@':
  1449. switch (p[0])
  1450. {
  1451. case 't':
  1452. if (p[1] == 'y' && p[2] == 'p' && p[3] == 'e' && !IsIdContinueNext(p + 4, last))
  1453. {
  1454. //+5 so we skip the space (change that)
  1455. p += 5;//FCASTE: don't like this code, revisit
  1456. switch ((ch = this->ReadFirst(p, last)))
  1457. {
  1458. case '{':
  1459. *pp = p;
  1460. return tkTypeAnnBegin;
  1461. }
  1462. }
  1463. Error(ERRsyntax);
  1464. default:
  1465. Error(ERRsyntax);
  1466. }
  1467. case kchLS: // 0x2028, classifies as new line
  1468. case kchPS: // 0x2029, classifies as new line
  1469. LEcmaLineBreak:
  1470. goto LLineBreak;
  1471. case kchRET:
  1472. case kchNWL:
  1473. LLineBreak:
  1474. m_fHadEol = TRUE;
  1475. m_currentCharacter = p;
  1476. ScanNewLine(ch);
  1477. p = m_currentCharacter;
  1478. break;
  1479. case kchNUL:
  1480. if (p >= last)
  1481. {
  1482. m_currentCharacter = p - 1;
  1483. *pp = p - 1;
  1484. if (m_fSyntaxColor)
  1485. {
  1486. m_scanState = ScanStateMultiLineComment;
  1487. return tkComment;
  1488. }
  1489. Error(ERRnoCmtEnd);
  1490. }
  1491. break;
  1492. default:
  1493. if (this->IsMultiUnitChar(ch))
  1494. {
  1495. ch = this->template ReadRest<true>(ch, p, last);
  1496. switch (ch)
  1497. {
  1498. case kchLS:
  1499. case kchPS:
  1500. goto LEcmaLineBreak;
  1501. }
  1502. }
  1503. break;
  1504. }
  1505. }
  1506. }
  1507. /*****************************************************************************
  1508. *
  1509. * Consume a C-style comment.
  1510. */
  1511. template<typename EncodingPolicy>
  1512. tokens Scanner<EncodingPolicy>::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef)
  1513. {
  1514. Assert(containTypeDef != nullptr);
  1515. EncodedCharPtr p = *pp;
  1516. *containTypeDef = false;
  1517. EncodedCharPtr last = m_pchLast;
  1518. OLECHAR ch;
  1519. //FCASTE: IMPORTANT
  1520. for (;;)
  1521. {
  1522. switch((ch = this->ReadFirst(p, last)))
  1523. {
  1524. case '*':
  1525. if (*p == '/')
  1526. {
  1527. *pp = p + 1;
  1528. if (m_fSyntaxColor)
  1529. {
  1530. m_scanState = ScanStateNormal;
  1531. return tkComment;
  1532. }
  1533. return tkNone;
  1534. }
  1535. break;
  1536. // ES 2015 11.3 Line Terminators
  1537. case kchLS: // 0x2028, classifies as new line
  1538. case kchPS: // 0x2029, classifies as new line
  1539. LEcmaLineBreak:
  1540. goto LLineBreak;
  1541. case kchRET:
  1542. case kchNWL:
  1543. LLineBreak:
  1544. m_fHadEol = TRUE;
  1545. m_currentCharacter = p;
  1546. ScanNewLine(ch);
  1547. p = m_currentCharacter;
  1548. break;
  1549. case kchNUL:
  1550. if (p >= last)
  1551. {
  1552. m_currentCharacter = p - 1;
  1553. *pp = p - 1;
  1554. if (m_fSyntaxColor)
  1555. {
  1556. m_scanState = ScanStateMultiLineComment;
  1557. return tkComment;
  1558. }
  1559. Error(ERRnoCmtEnd);
  1560. }
  1561. break;
  1562. default:
  1563. if (this->IsMultiUnitChar(ch))
  1564. {
  1565. ch = this->template ReadRest<true>(ch, p, last);
  1566. switch (ch)
  1567. {
  1568. case kchLS:
  1569. case kchPS:
  1570. goto LEcmaLineBreak;
  1571. }
  1572. }
  1573. break;
  1574. }
  1575. }
  1576. }
  1577. /*****************************************************************************
  1578. *
  1579. * We've encountered a newline - update various counters and things.
  1580. */
  1581. template<typename EncodingPolicy>
  1582. void Scanner<EncodingPolicy>::ScanNewLine(uint ch)
  1583. {
  1584. if (ch == '\r' && PeekNextChar() == '\n')
  1585. {
  1586. ReadNextChar();
  1587. }
  1588. NotifyScannedNewLine();
  1589. }
  1590. /*****************************************************************************
  1591. *
  1592. * We've encountered a newline - update various counters and things.
  1593. */
  1594. template<typename EncodingPolicy>
  1595. void Scanner<EncodingPolicy>::NotifyScannedNewLine()
  1596. {
  1597. // update in scanner: previous line, current line, number of lines.
  1598. m_line++;
  1599. m_pchPrevLine = m_pchMinLine;
  1600. m_pchMinLine = m_currentCharacter;
  1601. m_cMinLineMultiUnits = this->m_cMultiUnits;
  1602. }
  1603. /*****************************************************************************
  1604. *
  1605. * Delivers a token stream.
  1606. */
  1607. template<typename EncodingPolicy>
  1608. tokens Scanner<EncodingPolicy>::ScanForcingPid()
  1609. {
  1610. if (m_DeferredParseFlags != ScanFlagNone)
  1611. {
  1612. BYTE deferredParseFlagsSave = m_DeferredParseFlags;
  1613. m_DeferredParseFlags = ScanFlagNone;
  1614. tokens result = tkEOF;
  1615. TryFinally(
  1616. [&]() /* try block */
  1617. {
  1618. result = this->Scan();
  1619. },
  1620. [&](bool) /* finally block */
  1621. {
  1622. this->m_DeferredParseFlags = deferredParseFlagsSave;
  1623. });
  1624. return result;
  1625. }
  1626. return Scan();
  1627. }
  1628. template<typename EncodingPolicy>
  1629. tokens Scanner<EncodingPolicy>::Scan()
  1630. {
  1631. return ScanCore(true);
  1632. }
  1633. template<typename EncodingPolicy>
  1634. tokens Scanner<EncodingPolicy>::ScanNoKeywords()
  1635. {
  1636. return ScanCore(false);
  1637. }
  1638. template<typename EncodingPolicy>
  1639. tokens Scanner<EncodingPolicy>::ScanAhead()
  1640. {
  1641. return ScanNoKeywords();
  1642. }
  1643. template<typename EncodingPolicy>
  1644. tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
  1645. {
  1646. codepoint_t ch;
  1647. OLECHAR firstChar;
  1648. OLECHAR secondChar;
  1649. EncodedCharPtr pchT;
  1650. size_t multiUnits = 0;
  1651. EncodedCharPtr p = m_currentCharacter;
  1652. EncodedCharPtr last = m_pchLast;
  1653. bool seenDelimitedCommentEnd = false;
  1654. // store the last token
  1655. m_tkPrevious = m_ptoken->tk;
  1656. m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas
  1657. if (p >= last)
  1658. {
  1659. m_pchMinTok = p;
  1660. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1661. goto LEof;
  1662. }
  1663. tokens token;
  1664. m_fHadEol = FALSE;
  1665. CharTypes chType;
  1666. charcount_t commentStartLine;
  1667. if (m_scanState && *p != 0)
  1668. {
  1669. if (m_fSyntaxColor)
  1670. {
  1671. firstChar = 0;
  1672. secondChar = 0;
  1673. m_pchMinTok = p;
  1674. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1675. switch (m_scanState)
  1676. {
  1677. case ScanStateMultiLineComment:
  1678. goto LMultiLineComment;
  1679. case ScanStateMultiLineSingleQuoteString:
  1680. ch = '\'';
  1681. m_scanState = ScanStateNormal;
  1682. goto LScanStringConstant;
  1683. case ScanStateMultiLineDoubleQuoteString:
  1684. ch = '"';
  1685. m_scanState = ScanStateNormal;
  1686. goto LScanStringConstant;
  1687. }
  1688. }
  1689. switch (m_scanState)
  1690. {
  1691. case ScanStateStringTemplateMiddleOrEnd:
  1692. AssertMsg(m_fStringTemplateDepth > 0,
  1693. "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template");
  1694. m_scanState = ScanStateNormal;
  1695. pchT = p;
  1696. token = ScanStringTemplateMiddleOrEnd(&pchT);
  1697. p = pchT;
  1698. goto LDone;
  1699. case ScanStateTypeAnnotationMiddle:
  1700. m_scanState = ScanStateNormal;
  1701. pchT = p;
  1702. token = ScanTypeAnnotationType(&pchT);
  1703. p = pchT;
  1704. goto LDone;
  1705. }
  1706. }
  1707. for (;;)
  1708. {
  1709. LLoop:
  1710. m_pchMinTok = p;
  1711. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1712. ch = this->ReadFirst(p, last);
  1713. #if DEBUG
  1714. chType = this->charClassifier->GetCharType((OLECHAR)ch);
  1715. #endif
  1716. switch (ch)
  1717. {
  1718. default:
  1719. if (ch == kchLS ||
  1720. ch == kchPS )
  1721. {
  1722. goto LNewLine;
  1723. }
  1724. {
  1725. BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch);
  1726. if (isMultiUnit)
  1727. {
  1728. ch = this->template ReadRest<true>((OLECHAR)ch, p, last);
  1729. }
  1730. if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
  1731. {
  1732. codepoint_t upper = this->PeekFull(p, last);
  1733. if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
  1734. {
  1735. // Consume the rest of the utf8 bytes for the codepoint
  1736. OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);
  1737. Assert(decodedUpper == (OLECHAR) upper);
  1738. ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);
  1739. }
  1740. }
  1741. if (this->charClassifier->IsIdStart(ch))
  1742. {
  1743. // We treat IDContinue as an error.
  1744. token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p);
  1745. break;
  1746. }
  1747. }
  1748. chType = this->charClassifier->GetCharType(ch);
  1749. switch (chType)
  1750. {
  1751. case _C_WSP: continue;
  1752. case _C_NWL: goto LNewLine;
  1753. // All other types (except errors) are handled by the outer switch.
  1754. }
  1755. Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG);
  1756. if (m_fSyntaxColor)
  1757. {
  1758. // No need to decrement the current position pointer as scanner will continue with scan next character onwards
  1759. return ScanError(p, tkID);
  1760. }
  1761. m_currentCharacter = p - 1;
  1762. Error(ERRillegalChar);
  1763. continue;
  1764. case '\0':
  1765. // Put back the null in case we get called again.
  1766. p--;
  1767. LEof:
  1768. token = tkEOF;
  1769. if (p + 1 < last)
  1770. {
  1771. if (m_fSyntaxColor)
  1772. {
  1773. return ScanError(p + 1, tkID);
  1774. }
  1775. // A \0 prior to the end of the text is an invalid character.
  1776. Error(ERRillegalChar);
  1777. }
  1778. break;
  1779. case 0x0009:
  1780. case 0x000B:
  1781. case 0x000C:
  1782. case 0x0020:
  1783. Assert(chType == _C_WSP);
  1784. continue;
  1785. case '.':
  1786. if (!Js::NumberUtilities::IsDigit(*p))
  1787. {
  1788. // Not a double
  1789. if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() &&
  1790. this->PeekFirst(p, last) == '.' &&
  1791. this->PeekFirst(p + 1, last) == '.')
  1792. {
  1793. token = tkEllipsis;
  1794. p += 2;
  1795. }
  1796. else
  1797. {
  1798. token = tkDot;
  1799. }
  1800. break;
  1801. }
  1802. // May be a double, fall through
  1803. case '0': case '1': case '2': case '3': case '4':
  1804. case '5': case '6': case '7': case '8': case '9':
  1805. {
  1806. double dbl;
  1807. Assert(chType == _C_DIG || chType == _C_DOT);
  1808. p = m_pchMinTok;
  1809. this->RestoreMultiUnits(m_cMinTokMultiUnits);
  1810. bool likelyInt = true;
  1811. pchT = FScanNumber(p, &dbl, likelyInt);
  1812. if (p == pchT)
  1813. {
  1814. Assert(this->PeekFirst(p, last) != '.');
  1815. if (m_fSyntaxColor)
  1816. {
  1817. return ScanError(m_currentCharacter + 1, tkFltCon);
  1818. }
  1819. Error(ERRbadNumber);
  1820. }
  1821. Assert(!Js::NumberUtilities::IsNan(dbl));
  1822. p = pchT;
  1823. int32 value;
  1824. if (likelyInt && Js::NumberUtilities::FDblIsInt32(dbl, &value))
  1825. {
  1826. m_ptoken->SetLong(value);
  1827. token = tkIntCon;
  1828. }
  1829. else
  1830. {
  1831. token = tkFltCon;
  1832. m_ptoken->SetDouble(dbl, likelyInt);
  1833. }
  1834. break;
  1835. }
  1836. case '(': Assert(chType == _C_LPR); token = tkLParen; break;
  1837. case ')': Assert(chType == _C_RPR); token = tkRParen; break;
  1838. case ',': Assert(chType == _C_CMA); token = tkComma; break;
  1839. case ';': Assert(chType == _C_SMC); token = tkSColon; break;
  1840. case '[': Assert(chType == _C_LBR); token = tkLBrack; break;
  1841. case ']': Assert(chType == _C_RBR); token = tkRBrack; break;
  1842. case '~': Assert(chType == _C_TIL); token = tkTilde; break;
  1843. case '?': Assert(chType == _C_QUE); token = tkQMark; break;
  1844. case '{': Assert(chType == _C_LC); token = tkLCurly; break;
  1845. // ES 2015 11.3 Line Terminators
  1846. case '\r':
  1847. case '\n':
  1848. // kchLS:
  1849. // kchPS:
  1850. LNewLine:
  1851. m_currentCharacter = p;
  1852. ScanNewLine(ch);
  1853. p = m_currentCharacter;
  1854. m_fHadEol = TRUE;
  1855. continue;
  1856. LReserved:
  1857. {
  1858. // We will derive the PID from the token
  1859. Assert(token < tkID);
  1860. m_ptoken->SetIdentifier(NULL);
  1861. goto LDone;
  1862. }
  1863. LEval:
  1864. {
  1865. token = tkID;
  1866. if (!this->m_parser) goto LIdentifier;
  1867. m_ptoken->SetIdentifier(this->m_parser->GetEvalPid());
  1868. goto LDone;
  1869. }
  1870. LArguments:
  1871. {
  1872. token = tkID;
  1873. if (!this->m_parser) goto LIdentifier;
  1874. m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid());
  1875. goto LDone;
  1876. }
  1877. LTarget:
  1878. {
  1879. token = tkID;
  1880. if (!this->m_parser) goto LIdentifier;
  1881. m_ptoken->SetIdentifier(this->m_parser->GetTargetPid());
  1882. goto LDone;
  1883. }
  1884. #include "kwd-swtch.h"
  1885. case 'A': case 'B': case 'C': case 'D': case 'E':
  1886. case 'F': case 'G': case 'H': case 'I': case 'J':
  1887. case 'K': case 'L': case 'M': case 'N': case 'O':
  1888. case 'P': case 'Q': case 'R': case 'S': case 'T':
  1889. case 'U': case 'V': case 'W': case 'X': case 'Y':
  1890. case 'Z':
  1891. // Lower-case letters handled in kwd-swtch.h above during reserved word recognition.
  1892. case '$': case '_':
  1893. LIdentifier:
  1894. Assert(this->charClassifier->IsIdStart(ch));
  1895. Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch));
  1896. token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p);
  1897. break;
  1898. case '`':
  1899. Assert(chType == _C_BKQ);
  1900. pchT = p;
  1901. token = ScanStringTemplateBegin(&pchT);
  1902. p = pchT;
  1903. break;
  1904. case '}':
  1905. Assert(chType == _C_RC);
  1906. token = tkRCurly;
  1907. break;
  1908. case '\\':
  1909. pchT = p - 1;
  1910. token = ScanIdentifier(identifyKwds, &pchT);
  1911. if (tkScanError == token)
  1912. {
  1913. m_currentCharacter = p;
  1914. if (m_fSyntaxColor)
  1915. return ScanError(p, tkID);
  1916. Error(ERRillegalChar);
  1917. }
  1918. p = pchT;
  1919. break;
  1920. case ':':
  1921. token = tkColon;
  1922. break;
  1923. case '=':
  1924. token = tkAsg;
  1925. switch (this->PeekFirst(p, last))
  1926. {
  1927. case '=':
  1928. p++;
  1929. token = tkEQ;
  1930. if (this->PeekFirst(p, last) == '=')
  1931. {
  1932. p++;
  1933. token = tkEqv;
  1934. }
  1935. break;
  1936. case '>':
  1937. p++;
  1938. token = tkDArrow;
  1939. break;
  1940. }
  1941. break;
  1942. case '!':
  1943. token = tkBang;
  1944. if (this->PeekFirst(p, last) == '=')
  1945. {
  1946. p++;
  1947. token = tkNE;
  1948. if (this->PeekFirst(p, last) == '=')
  1949. {
  1950. p++;
  1951. token = tkNEqv;
  1952. }
  1953. }
  1954. break;
  1955. case '+':
  1956. token = tkAdd;
  1957. switch (this->PeekFirst(p, last))
  1958. {
  1959. case '=':
  1960. p++;
  1961. token = tkAsgAdd;
  1962. break;
  1963. case '+':
  1964. p++;
  1965. token = tkInc;
  1966. break;
  1967. }
  1968. break;
  1969. case '-':
  1970. token = tkSub;
  1971. switch (this->PeekFirst(p, last))
  1972. {
  1973. case '=':
  1974. p++;
  1975. token = tkAsgSub;
  1976. break;
  1977. case '-':
  1978. p++;
  1979. token = tkDec;
  1980. if (!m_fIsModuleCode)
  1981. {
  1982. if ('>' == this->PeekFirst(p, last) && (m_fHadEol || seenDelimitedCommentEnd)) // --> HTMLCloseComment
  1983. {
  1984. goto LSkipLineComment;
  1985. }
  1986. }
  1987. break;
  1988. }
  1989. break;
  1990. case '*':
  1991. token = tkStar;
  1992. switch(this->PeekFirst(p, last))
  1993. {
  1994. case '=' :
  1995. p++;
  1996. token = tkAsgMul;
  1997. break;
  1998. case '*' :
  1999. if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled())
  2000. {
  2001. break;
  2002. }
  2003. p++;
  2004. token = tkExpo;
  2005. if (this->PeekFirst(p, last) == '=')
  2006. {
  2007. p++;
  2008. token = tkAsgExpo;
  2009. }
  2010. }
  2011. break;
  2012. case '/':
  2013. token = tkDiv;
  2014. switch(this->PeekFirst(p, last))
  2015. {
  2016. case '=':
  2017. p++;
  2018. token = tkAsgDiv;
  2019. break;
  2020. case '/':
  2021. if (p >= last)
  2022. {
  2023. AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?");
  2024. // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning
  2025. // those, we have passed "last" already. Move back and return EOF.
  2026. p = last;
  2027. goto LEof;
  2028. }
  2029. ch = *++p;
  2030. firstChar = (OLECHAR)ch;
  2031. LSkipLineComment:
  2032. pchT = NULL;
  2033. for (;;)
  2034. {
  2035. switch ((ch = this->ReadFirst(p, last)))
  2036. {
  2037. case kchLS: // 0x2028, classifies as new line
  2038. case kchPS: // 0x2029, classifies as new line
  2039. LEcmaCommentLineBreak:
  2040. // kchPS and kchLS are more than one unit in UTF-8.
  2041. if (pchT)
  2042. {
  2043. // kchPS and kchLS are more than one unit in UTF-8.
  2044. p = pchT;
  2045. }
  2046. else
  2047. {
  2048. // But only a single code unit in UTF16
  2049. p--;
  2050. }
  2051. this->RestoreMultiUnits(multiUnits);
  2052. goto LCommentLineBreak;
  2053. case kchNWL:
  2054. case kchRET:
  2055. p--;
  2056. LCommentLineBreak:
  2057. if (m_fSyntaxColor)
  2058. {
  2059. token = tkComment;
  2060. goto LDone;
  2061. }
  2062. // Subtract the comment length from the total char count for the purpose
  2063. // of deciding whether to defer AST and byte code generation.
  2064. m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok));
  2065. break;
  2066. case kchNUL:
  2067. if (p >= last)
  2068. {
  2069. p--;
  2070. goto LCommentLineBreak;
  2071. }
  2072. continue;
  2073. default:
  2074. if (this->IsMultiUnitChar((OLECHAR)ch))
  2075. {
  2076. pchT = p - 1;
  2077. multiUnits = this->m_cMultiUnits;
  2078. switch (ch = this->template ReadRest<true>((OLECHAR)ch, p, last))
  2079. {
  2080. case kchLS:
  2081. case kchPS:
  2082. goto LEcmaCommentLineBreak;
  2083. }
  2084. }
  2085. continue;
  2086. }
  2087. break;
  2088. }
  2089. continue;
  2090. case '*':
  2091. ch = *++p;
  2092. firstChar = (OLECHAR)ch;
  2093. if ((p + 1) < last)
  2094. {
  2095. secondChar = (OLECHAR)(*(p + 1));
  2096. }
  2097. else
  2098. {
  2099. secondChar = '\0';
  2100. }
  2101. LMultiLineComment:
  2102. pchT = p;
  2103. commentStartLine = m_line;
  2104. bool containTypeDef;
  2105. //FCASTE: Add parsing of @type here
  2106. token = m_typeAnnotationsOn ? ScanAnnotations(&pchT) : SkipComment(&pchT, &containTypeDef);
  2107. if (token == tkNone) {
  2108. // Subtract the comment length from the total char count for the purpose
  2109. // of deciding whether to defer AST and byte code generation.
  2110. m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok));
  2111. p = pchT;
  2112. seenDelimitedCommentEnd = true;
  2113. goto LLoop;
  2114. }
  2115. p = pchT;
  2116. break;
  2117. }
  2118. break;
  2119. case '%':
  2120. Assert(chType == _C_PCT);
  2121. token = tkPct;
  2122. if (this->PeekFirst(p, last) == '=')
  2123. {
  2124. p++;
  2125. token = tkAsgMod;
  2126. }
  2127. break;
  2128. case '<':
  2129. Assert(chType == _C_LT);
  2130. token = tkLT;
  2131. switch (this->PeekFirst(p, last))
  2132. {
  2133. case '=':
  2134. p++;
  2135. token = tkLE;
  2136. break;
  2137. case '<':
  2138. p++;
  2139. token = tkLsh;
  2140. if (this->PeekFirst(p, last) == '=')
  2141. {
  2142. p++;
  2143. token = tkAsgLsh;
  2144. break;
  2145. }
  2146. break;
  2147. case '!':
  2148. // ES 2015 B.1.3 - HTML comments are only allowed when parsing non-module code.
  2149. if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-')
  2150. {
  2151. // This is a "<!--" comment - treat as //
  2152. if (p >= last)
  2153. {
  2154. // Effective source length may have excluded HTMLCommentSuffix "<!-- ... -->". If we are scanning
  2155. // those, we have passed "last" already. Move back and return EOF.
  2156. p = last;
  2157. goto LEof;
  2158. }
  2159. firstChar = '!';
  2160. goto LSkipLineComment;
  2161. }
  2162. break;
  2163. }
  2164. break;
  2165. case '>':
  2166. Assert(chType == _C_GT);
  2167. token = tkGT;
  2168. switch (this->PeekFirst(p, last))
  2169. {
  2170. case '=':
  2171. p++;
  2172. token = tkGE;
  2173. break;
  2174. case '>':
  2175. p++;
  2176. token = tkRsh;
  2177. switch (this->PeekFirst(p, last))
  2178. {
  2179. case '=':
  2180. p++;
  2181. token = tkAsgRsh;
  2182. break;
  2183. case '>':
  2184. p++;
  2185. token = tkRs2;
  2186. if (*p == '=')
  2187. {
  2188. p++;
  2189. token = tkAsgRs2;
  2190. }
  2191. break;
  2192. }
  2193. break;
  2194. }
  2195. break;
  2196. case '^':
  2197. Assert(chType == _C_XOR);
  2198. token = tkXor;
  2199. if (this->PeekFirst(p, last) == '=')
  2200. {
  2201. p++;
  2202. token = tkAsgXor;
  2203. }
  2204. break;
  2205. case '|':
  2206. Assert(chType == _C_BAR);
  2207. token = tkOr;
  2208. switch (this->PeekFirst(p, last))
  2209. {
  2210. case '=':
  2211. p++;
  2212. token = tkAsgOr;
  2213. break;
  2214. case '|':
  2215. p++;
  2216. token = tkLogOr;
  2217. break;
  2218. }
  2219. break;
  2220. case '&':
  2221. Assert(chType == _C_AMP);
  2222. token = tkAnd;
  2223. switch (this->PeekFirst(p, last))
  2224. {
  2225. case '=':
  2226. p++;
  2227. token = tkAsgAnd;
  2228. break;
  2229. case '&':
  2230. p++;
  2231. token = tkLogAnd;
  2232. break;
  2233. }
  2234. break;
  2235. case '\'':
  2236. case '"':
  2237. Assert(chType == _C_QUO || chType == _C_APO);
  2238. LScanStringConstant:
  2239. pchT = p;
  2240. token = this->ScanStringConstant((OLECHAR)ch, &pchT);
  2241. p = pchT;
  2242. break;
  2243. }
  2244. break;
  2245. }
  2246. LDone:
  2247. m_currentCharacter = p;
  2248. return (m_ptoken->tk = token);
  2249. }
  2250. template <typename EncodingPolicy>
  2251. IdentPtr Scanner<EncodingPolicy>::GetSecondaryBufferAsPid()
  2252. {
  2253. bool createPid = true;
  2254. if (m_fSyntaxColor || (m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
  2255. {
  2256. createPid = false;
  2257. }
  2258. if (createPid)
  2259. {
  2260. return m_phtbl->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur);
  2261. }
  2262. else
  2263. {
  2264. return nullptr;
  2265. }
  2266. }
  2267. template <typename EncodingPolicy>
  2268. LPCOLESTR Scanner<EncodingPolicy>::StringFromLong(int32 lw)
  2269. {
  2270. _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10);
  2271. return m_tempChBuf.m_prgch;
  2272. }
  2273. template <typename EncodingPolicy>
  2274. IdentPtr Scanner<EncodingPolicy>::PidFromLong(int32 lw)
  2275. {
  2276. return m_phtbl->PidHashName(StringFromLong(lw));
  2277. }
  2278. template <typename EncodingPolicy>
  2279. LPCOLESTR Scanner<EncodingPolicy>::StringFromDbl(double dbl)
  2280. {
  2281. if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax))
  2282. {
  2283. Error(ERRnoMemory);
  2284. }
  2285. return m_tempChBuf.m_prgch;
  2286. }
  2287. template <typename EncodingPolicy>
  2288. IdentPtr Scanner<EncodingPolicy>::PidFromDbl(double dbl)
  2289. {
  2290. return m_phtbl->PidHashName(StringFromDbl(dbl));
  2291. }
  2292. template <typename EncodingPolicy>
  2293. void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint)
  2294. {
  2295. Capture(restorePoint, 0, 0);
  2296. }
  2297. template <typename EncodingPolicy>
  2298. void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr)
  2299. {
  2300. restorePoint->m_ichMinTok = this->IchMinTok();
  2301. restorePoint->m_ichMinLine = this->IchMinLine();
  2302. restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits;
  2303. restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits;
  2304. restorePoint->m_line = this->m_line;
  2305. restorePoint->m_fHadEol = this->m_fHadEol;
  2306. restorePoint->functionIdIncrement = functionIdIncrement;
  2307. restorePoint->lengthDecr = lengthDecr;
  2308. #ifdef DEBUG
  2309. restorePoint->m_cMultiUnits = this->m_cMultiUnits;
  2310. #endif
  2311. }
  2312. template <typename EncodingPolicy>
  2313. void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint)
  2314. {
  2315. SeekAndScan<false>(restorePoint);
  2316. }
  2317. template <typename EncodingPolicy>
  2318. void Scanner<EncodingPolicy>::SeekToForcingPid(const RestorePoint& restorePoint)
  2319. {
  2320. SeekAndScan<true>(restorePoint);
  2321. }
  2322. template <typename EncodingPolicy>
  2323. template <bool forcePid>
  2324. void Scanner<EncodingPolicy>::SeekAndScan(const RestorePoint& restorePoint)
  2325. {
  2326. this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits;
  2327. this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits;
  2328. this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits;
  2329. this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits);
  2330. if (forcePid)
  2331. {
  2332. this->ScanForcingPid();
  2333. }
  2334. else
  2335. {
  2336. this->Scan();
  2337. }
  2338. this->m_line = restorePoint.m_line;
  2339. this->m_fHadEol = restorePoint.m_fHadEol;
  2340. this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr);
  2341. Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits);
  2342. }
  2343. template <typename EncodingPolicy>
  2344. void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId)
  2345. {
  2346. SeekTo(restorePoint);
  2347. *nextFunctionId += restorePoint.functionIdIncrement;
  2348. }
  2349. // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred.
  2350. template<typename EncodingPolicy>
  2351. HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine)
  2352. {
  2353. if( !pbstrLine )
  2354. {
  2355. return E_POINTER;
  2356. }
  2357. // If we overflow the string, we have a serious problem...
  2358. if (ichMinLine < 0 || static_cast<size_t>(ichMinLine) > AdjustedLength() )
  2359. {
  2360. return E_UNEXPECTED;
  2361. }
  2362. typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine);
  2363. typename EncodingPolicy::EncodedCharPtr pEnd = AdjustedLast();
  2364. // Determine the length by scanning for the next newline
  2365. charcount_t cch = LineLength(pStart, pEnd);
  2366. Assert(cch <= LONG_MAX);
  2367. *pbstrLine = SysAllocStringLen(NULL, cch);
  2368. if (!*pbstrLine)
  2369. {
  2370. return E_OUTOFMEMORY;
  2371. }
  2372. this->ConvertToUnicode(*pbstrLine, cch, pStart);
  2373. return S_OK;
  2374. }
  2375. template class Scanner<NullTerminatedUnicodeEncodingPolicy>;
  2376. template class Scanner<NullTerminatedUTF8EncodingPolicy>;
  2377. template class Scanner<NotNullTerminatedUTF8EncodingPolicy>;