Scan.cpp 73 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. /*****************************************************************************
  7. *
  8. * The following table speeds various tests of characters, such as whether
  9. * a given character can be part of an identifier, and so on.
  10. */
  11. int CountNewlines(LPCOLESTR psz, int cch)
  12. {
  13. int cln = 0;
  14. while (0 != *psz && 0 != cch--)
  15. {
  16. switch (*psz++)
  17. {
  18. case _u('\xD'):
  19. if (*psz == _u('\xA'))
  20. {
  21. ++psz;
  22. if (0 == cch--)
  23. break;
  24. }
  25. // fall-through
  26. case _u('\xA'):
  27. cln++;
  28. break;
  29. }
  30. }
  31. return cln;
  32. }
  33. BOOL Token::IsKeyword() const
  34. {
  35. // keywords (but not future reserved words)
  36. return (tk <= tkYIELD);
  37. }
  38. tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser)
  39. {
  40. Assert(parser);
  41. if(pattern)
  42. parser->RegisterRegexPattern(pattern);
  43. this->u.pattern = pattern;
  44. return tk = tkRegExp;
  45. }
  46. IdentPtr Token::CreateIdentifier(HashTbl * hashTbl)
  47. {
  48. Assert(this->u.pid == nullptr);
  49. if (this->u.pchMin)
  50. {
  51. Assert(IsIdentifier());
  52. IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.pchMin + this->u.length, this->u.length);
  53. this->u.pid = pid;
  54. return pid;
  55. }
  56. Assert(IsReservedWord());
  57. IdentPtr pid = hashTbl->PidFromTk(tk);
  58. this->u.pid = pid;
  59. return pid;
  60. }
  61. template <typename EncodingPolicy>
  62. Scanner<EncodingPolicy>::Scanner(Parser* parser, HashTbl *phtbl, Token *ptoken, Js::ScriptContext* scriptContext)
  63. {
  64. AssertMem(phtbl);
  65. AssertMem(ptoken);
  66. m_parser = parser;
  67. m_phtbl = phtbl;
  68. m_ptoken = ptoken;
  69. m_cMinLineMultiUnits = 0;
  70. m_fHadEol = FALSE;
  71. m_doubleQuoteOnLastTkStrCon = FALSE;
  72. m_OctOrLeadingZeroOnLastTKNumber = false;
  73. m_fStringTemplateDepth = 0;
  74. m_scanState = ScanStateNormal;
  75. m_scriptContext = scriptContext;
  76. m_line = 0;
  77. m_startLine = 0;
  78. m_pchStartLine = NULL;
  79. m_ichMinError = 0;
  80. m_ichLimError = 0;
  81. m_tempChBuf.m_pscanner = this;
  82. m_tempChBufSecondary.m_pscanner = this;
  83. m_iecpLimTokPrevious = (size_t)-1;
  84. this->charClassifier = scriptContext->GetCharClassifier();
  85. this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled();
  86. m_fYieldIsKeywordRegion = false;
  87. m_fAwaitIsKeywordRegion = false;
  88. }
  89. template <typename EncodingPolicy>
  90. Scanner<EncodingPolicy>::~Scanner(void)
  91. {
  92. }
  93. /*****************************************************************************
  94. *
  95. * Initializes the scanner to prepare to scan the given source text.
  96. */
  97. template <typename EncodingPolicy>
  98. void Scanner<EncodingPolicy>::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, ULONG grfscr, ULONG lineNumber)
  99. {
  100. // Save the start of the script and add the offset to get the point where we should start scanning.
  101. m_pchBase = pszSrc;
  102. m_pchLast = m_pchBase + offset + length;
  103. m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset;
  104. this->RestoreMultiUnits(offset - charOffset);
  105. // Absorb any byte order mark at the start
  106. if(offset == 0)
  107. {
  108. switch( this->PeekFull(m_currentCharacter, m_pchLast) )
  109. {
  110. case 0xFFEE: // "Opposite" endian BOM
  111. // We do not support big-endian encodings
  112. // fall-through
  113. case 0xFEFF: // "Correct" BOM
  114. this->template ReadFull<true>(m_currentCharacter, m_pchLast);
  115. break;
  116. }
  117. }
  118. m_line = lineNumber;
  119. m_startLine = lineNumber;
  120. m_pchStartLine = m_currentCharacter;
  121. m_ptoken->tk = tkNone;
  122. m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0;
  123. m_fHadEol = FALSE;
  124. m_DeferredParseFlags = ScanFlagNone;
  125. }
  126. template <typename EncodingPolicy>
  127. void Scanner<EncodingPolicy>::PrepareForBackgroundParse(Js::ScriptContext *scriptContext)
  128. {
  129. scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
  130. scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  131. }
  132. //-----------------------------------------------------------------------------
  133. // Number of code points from 'first' up to, but not including the next
  134. // newline character, embedded NUL, or 'last', depending on which comes first.
  135. //
  136. // This is used to determine a length of BSTR, which can't contain a NUL character.
  137. //-----------------------------------------------------------------------------
  138. template <typename EncodingPolicy>
  139. charcount_t Scanner<EncodingPolicy>::LineLength(EncodedCharPtr first, EncodedCharPtr last)
  140. {
  141. charcount_t result = 0;
  142. EncodedCharPtr p = first;
  143. for (;;)
  144. {
  145. switch( this->template ReadFull<false>(p, last) )
  146. {
  147. case kchNWL: // _C_NWL
  148. case kchRET:
  149. case kchLS:
  150. case kchPS:
  151. case kchNUL: // _C_NUL
  152. return result;
  153. }
  154. result++;
  155. }
  156. }
  157. template <typename EncodingPolicy>
  158. charcount_t Scanner<EncodingPolicy>::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd)
  159. {
  160. EncodedCharPtr p = start;
  161. charcount_t ich = ichStart;
  162. int32 current = line;
  163. charcount_t lastStart = ichStart;
  164. while (ich < ichEnd)
  165. {
  166. ich++;
  167. switch (this->template ReadFull<false>(p, last))
  168. {
  169. case kchRET:
  170. if (this->PeekFull(p, last) == kchNWL)
  171. {
  172. ich++;
  173. this->template ReadFull<false>(p, last);
  174. }
  175. // fall-through
  176. case kchNWL:
  177. case kchLS:
  178. case kchPS:
  179. current++;
  180. lastStart = ich;
  181. break;
  182. case kchNUL:
  183. goto done;
  184. }
  185. }
  186. done:
  187. line = current;
  188. return lastStart;
  189. }
  190. template <typename EncodingPolicy>
  191. bool Scanner<EncodingPolicy>::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar)
  192. {
  193. Assert(outChar != nullptr);
  194. Assert(startingLocation <= endOfSource);
  195. EncodedCharPtr currentLocation = startingLocation;
  196. codepoint_t charToOutput = 0x0;
  197. // '\' is Assumed as there is only one caller
  198. // Read 'u' characters
  199. if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u')
  200. {
  201. return false;
  202. }
  203. bool expectCurly = false;
  204. if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode)
  205. {
  206. expectCurly = true;
  207. // Move past the character
  208. this->ReadFirst(currentLocation, endOfSource);
  209. }
  210. uint i = 0;
  211. OLECHAR ch = 0;
  212. int hexValue = 0;
  213. uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u);
  214. for(; i < maxHexDigits && currentLocation < endOfSource; i++)
  215. {
  216. if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue))
  217. {
  218. break;
  219. }
  220. charToOutput = charToOutput * 0x10 + hexValue;
  221. if (charToOutput > 0x10FFFF)
  222. {
  223. return false;
  224. }
  225. }
  226. //At least 4 characters have to be read
  227. if (i == 0 || (i != 4 && !expectCurly))
  228. {
  229. return false;
  230. }
  231. Assert(expectCurly ? es6UnicodeMode : true);
  232. if (expectCurly && ch != '}')
  233. {
  234. return false;
  235. }
  236. *outChar = charToOutput;
  237. startingLocation = currentLocation;
  238. return true;
  239. }
  240. template <typename EncodingPolicy>
  241. template <bool bScan>
  242. bool Scanner<EncodingPolicy>::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar)
  243. {
  244. Assert(outChar != nullptr);
  245. Assert(outContainsMultiUnitChar != nullptr);
  246. Assert(es6UnicodeMode);
  247. Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower));
  248. EncodedCharPtr currentLocation = startingLocation;
  249. *outChar = lower;
  250. if (currentLocation < endOfSource)
  251. {
  252. size_t restorePoint = this->m_cMultiUnits;
  253. codepoint_t upper = this->template ReadFull<bScan>(currentLocation, endOfSource);
  254. if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
  255. {
  256. *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper);
  257. if (this->IsMultiUnitChar(static_cast<OLECHAR>(upper)))
  258. {
  259. *outContainsMultiUnitChar = true;
  260. }
  261. startingLocation = currentLocation;
  262. }
  263. else
  264. {
  265. this->RestoreMultiUnits(restorePoint);
  266. }
  267. }
  268. return true;
  269. }
  270. template <typename EncodingPolicy>
  271. template <bool bScan>
  272. inline bool Scanner<EncodingPolicy>::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar)
  273. {
  274. Assert(outChar != nullptr);
  275. Assert(outContainsMultiUnitChar != nullptr);
  276. if (startingLocation >= endOfSource)
  277. {
  278. return false;
  279. }
  280. codepoint_t ch = this->template ReadFull<bScan>(startingLocation, endOfSource);
  281. if (FBigChar(ch))
  282. {
  283. if (this->IsMultiUnitChar(static_cast<OLECHAR>(ch)))
  284. {
  285. *outContainsMultiUnitChar = true;
  286. }
  287. if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
  288. {
  289. return TryReadCodePointRest<bScan>(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar);
  290. }
  291. }
  292. else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch))
  293. {
  294. *hasEscape = true;
  295. }
  296. *outChar = ch;
  297. return true;
  298. }
  299. template <typename EncodingPolicy>
  300. tokens Scanner<EncodingPolicy>::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp)
  301. {
  302. EncodedCharPtr p = *pp;
  303. EncodedCharPtr pchMin = p;
  304. // JS6 allows unicode characters in the form of \uxxxx escape sequences
  305. // to be part of the identifier.
  306. bool fHasEscape = false;
  307. bool fHasMultiChar = false;
  308. codepoint_t codePoint = INVALID_CODEPOINT;
  309. size_t multiUnitsBeforeLast = this->m_cMultiUnits;
  310. // Check if we started the id
  311. if (!TryReadCodePoint<true>(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar))
  312. {
  313. // If no chars. could be scanned as part of the identifier, return error.
  314. return tkScanError;
  315. }
  316. Assert(codePoint < 0x110000u);
  317. if (!charClassifier->IsIdStart(codePoint))
  318. {
  319. // Put back the last character
  320. this->RestoreMultiUnits(multiUnitsBeforeLast);
  321. // If no chars. could be scanned as part of the identifier, return error.
  322. return tkScanError;
  323. }
  324. return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp);
  325. }
  326. template <typename EncodingPolicy>
  327. BOOL Scanner<EncodingPolicy>::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last)
  328. {
  329. if (EncodingPolicy::MultiUnitEncoding)
  330. {
  331. while (p < last)
  332. {
  333. EncodedChar currentChar = *p;
  334. if (this->IsMultiUnitChar(currentChar))
  335. {
  336. // multi unit character, we may not have reach the end yet
  337. return FALSE;
  338. }
  339. Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast<false>(currentChar));
  340. if (!charClassifier->IsIdContinueFast<false>(currentChar))
  341. {
  342. // only reach the end of the identifier if it is not the start of an escape sequence
  343. return currentChar != '\\';
  344. }
  345. p++;
  346. }
  347. // We have reach the end of the identifier.
  348. return TRUE;
  349. }
  350. // Not fast path for non multi unit encoding
  351. return false;
  352. }
  353. template <typename EncodingPolicy>
  354. tokens Scanner<EncodingPolicy>::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar,
  355. EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp)
  356. {
  357. EncodedCharPtr last = m_pchLast;
  358. while (true)
  359. {
  360. // Fast path for utf8, non-multi unit char and not escape
  361. if (FastIdentifierContinue(p, last))
  362. {
  363. break;
  364. }
  365. // Slow path that has to deal with multi unit encoding
  366. codepoint_t codePoint = INVALID_CODEPOINT;
  367. EncodedCharPtr pchBeforeLast = p;
  368. size_t multiUnitsBeforeLast = this->m_cMultiUnits;
  369. if (TryReadCodePoint<true>(p, last, &codePoint, &fHasEscape, &fHasMultiChar))
  370. {
  371. Assert(codePoint < 0x110000u);
  372. if (charClassifier->IsIdContinue(codePoint))
  373. {
  374. continue;
  375. }
  376. }
  377. // Put back the last character
  378. p = pchBeforeLast;
  379. this->RestoreMultiUnits(multiUnitsBeforeLast);
  380. break;
  381. }
  382. Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX);
  383. *pp = p;
  384. if (!identifyKwds)
  385. {
  386. return tkID;
  387. }
  388. // During syntax coloring, scanner doesn't need to convert the escape sequence to get actual characters, it just needs the classification information
  389. // So call up hashtables custom method to check if the string scanned is identifier or keyword.
  390. // Do the same for deferred parsing, but use a custom method that only tokenizes JS keywords.
  391. if ((m_DeferredParseFlags & ScanFlagSuppressIdPid) != 0)
  392. {
  393. m_ptoken->SetIdentifier(NULL);
  394. if (!fHasEscape)
  395. {
  396. // If there are no escape, that the main scan loop would have found the keyword already
  397. // So we can just assume it is an ID
  398. DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
  399. DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
  400. Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()));
  401. return tkID;
  402. }
  403. int32 cch = UnescapeToTempBuf(pchMin, p);
  404. tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode());
  405. return (!this->YieldIsKeyword() && tk == tkYIELD) || (!this->AwaitIsKeyword() && tk == tkAWAIT) ? tkID : tk;
  406. }
  407. // UTF16 Scanner are only for syntax coloring, so it shouldn't come here.
  408. if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape)
  409. {
  410. Assert(sizeof(EncodedChar) == 1);
  411. // If there are no escape, that the main scan loop would have found the keyword already
  412. // So we can just assume it is an ID
  413. DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
  414. DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
  415. Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()));
  416. m_ptoken->SetIdentifier(reinterpret_cast<const char *>(pchMin), (int32)(p - pchMin));
  417. return tkID;
  418. }
  419. IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar);
  420. m_ptoken->SetIdentifier(pid);
  421. if (!fHasEscape)
  422. {
  423. // If it doesn't have escape, then Scan() should have taken care of keywords (except
  424. // yield if this->YieldIsKeyword() is false, in which case yield is treated as an identifier, and except
  425. // await if this->AwaitIsKeyword() is false, in which case await is treated as an identifier).
  426. // We don't have to check if the name is reserved word and return it as an Identifier
  427. Assert(pid->Tk(IsStrictMode()) == tkID
  428. || (pid->Tk(IsStrictMode()) == tkYIELD && !this->YieldIsKeyword())
  429. || (pid->Tk(IsStrictMode()) == tkAWAIT && !this->AwaitIsKeyword()));
  430. return tkID;
  431. }
  432. tokens tk = pid->Tk(IsStrictMode());
  433. return tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()) ? tkID : tkNone;
  434. }
  435. template <typename EncodingPolicy>
  436. IdentPtr Scanner<EncodingPolicy>::PidAt(size_t iecpMin, size_t iecpLim)
  437. {
  438. Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin);
  439. return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim);
  440. }
  441. template <typename EncodingPolicy>
  442. uint32 Scanner<EncodingPolicy>::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last)
  443. {
  444. m_tempChBuf.Init();
  445. while( p < last )
  446. {
  447. codepoint_t codePoint;
  448. bool hasEscape, isMultiChar;
  449. bool gotCodePoint = TryReadCodePoint<false>(p, last, &codePoint, &hasEscape, &isMultiChar);
  450. Assert(gotCodePoint);
  451. Assert(codePoint < 0x110000);
  452. if (codePoint < 0x10000)
  453. {
  454. m_tempChBuf.AppendCh((OLECHAR)codePoint);
  455. }
  456. else
  457. {
  458. char16 lower, upper;
  459. Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper);
  460. m_tempChBuf.AppendCh(lower);
  461. m_tempChBuf.AppendCh(upper);
  462. }
  463. }
  464. return m_tempChBuf.m_ichCur;
  465. }
  466. template <typename EncodingPolicy>
  467. IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last)
  468. {
  469. int32 cch = UnescapeToTempBuf(p, last);
  470. return m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, cch);
  471. }
  472. template <typename EncodingPolicy>
  473. IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar)
  474. {
  475. // If there is an escape sequence in the JS6 identifier or it is a UTF8
  476. // source then we have to convert it to the equivalent char so we use a
  477. // buffer for translation.
  478. if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape)
  479. {
  480. return PidOfIdentiferAt(p, last);
  481. }
  482. else if (EncodingPolicy::MultiUnitEncoding)
  483. {
  484. Assert(sizeof(EncodedChar) == 1);
  485. return m_phtbl->PidHashNameLen(reinterpret_cast<const char *>(p), reinterpret_cast<const char *>(last), (int32)(last - p));
  486. }
  487. else
  488. {
  489. Assert(sizeof(EncodedChar) == 2);
  490. return m_phtbl->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p));
  491. }
  492. }
  493. template <typename EncodingPolicy>
  494. typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)
  495. {
  496. EncodedCharPtr last = m_pchLast;
  497. EncodedCharPtr pchT;
  498. likelyInt = true;
  499. // Reset
  500. m_OctOrLeadingZeroOnLastTKNumber = false;
  501. if ('0' == this->PeekFirst(p, last))
  502. {
  503. switch(this->PeekFirst(p + 1, last))
  504. {
  505. case '.':
  506. case 'e':
  507. case 'E':
  508. likelyInt = false;
  509. // Floating point
  510. goto LFloat;
  511. case 'x':
  512. case 'X':
  513. // Hex
  514. *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT);
  515. if (pchT == p + 2)
  516. {
  517. // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
  518. *pdbl = 0;
  519. return p + 1;
  520. }
  521. else
  522. return pchT;
  523. case 'o':
  524. case 'O':
  525. // Octal
  526. *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT);
  527. if (pchT == p + 2)
  528. {
  529. // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
  530. *pdbl = 0;
  531. return p + 1;
  532. }
  533. return pchT;
  534. case 'b':
  535. case 'B':
  536. // Binary
  537. *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT);
  538. if (pchT == p + 2)
  539. {
  540. // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
  541. *pdbl = 0;
  542. return p + 1;
  543. }
  544. return pchT;
  545. default:
  546. // Octal
  547. *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT);
  548. Assert(pchT > p);
  549. #if !SOURCERELEASE
  550. // If an octal literal is malformed then it is in fact a decimal literal.
  551. #endif // !SOURCERELEASE
  552. if(*pdbl != 0 || pchT > p + 1)
  553. m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
  554. switch (*pchT)
  555. {
  556. case '8':
  557. case '9':
  558. // case 'e':
  559. // case 'E':
  560. // case '.':
  561. m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
  562. goto LFloat;
  563. }
  564. return pchT;
  565. }
  566. }
  567. else
  568. {
  569. LFloat:
  570. *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyInt);
  571. Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl));
  572. return pchT;
  573. }
  574. }
  575. template <typename EncodingPolicy>
  576. BOOL Scanner<EncodingPolicy>::oFScanNumber(double *pdbl, bool& likelyInt)
  577. {
  578. EncodedCharPtr pchT;
  579. m_OctOrLeadingZeroOnLastTKNumber = false;
  580. likelyInt = true;
  581. if ('0' == *m_currentCharacter)
  582. {
  583. switch (m_currentCharacter[1])
  584. {
  585. case '.':
  586. case 'e':
  587. case 'E':
  588. likelyInt = false;
  589. // Floating point.
  590. goto LFloat;
  591. case 'x':
  592. case 'X':
  593. // Hex.
  594. *pdbl = Js::NumberUtilities::DblFromHex<EncodedChar>(m_currentCharacter + 2, &pchT);
  595. if (pchT == m_currentCharacter + 2)
  596. {
  597. // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
  598. *pdbl = 0;
  599. m_currentCharacter++;
  600. }
  601. else
  602. m_currentCharacter = pchT;
  603. break;
  604. case 'o':
  605. case 'O':
  606. *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter + 2, &pchT);
  607. if (pchT == m_currentCharacter + 2)
  608. {
  609. // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
  610. *pdbl = 0;
  611. m_currentCharacter++;
  612. }
  613. else
  614. m_currentCharacter = pchT;
  615. break;
  616. case 'b':
  617. case 'B':
  618. *pdbl = Js::NumberUtilities::DblFromBinary(m_currentCharacter + 2, &pchT);
  619. if (pchT == m_currentCharacter + 2)
  620. {
  621. // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
  622. *pdbl = 0;
  623. m_currentCharacter++;
  624. }
  625. else
  626. m_currentCharacter = pchT;
  627. break;
  628. default:
  629. // Octal.
  630. *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter, &pchT);
  631. Assert(pchT > m_currentCharacter);
  632. #if !SOURCERELEASE
  633. // If an octal literal is malformed then it is in fact a decimal literal.
  634. #endif // !SOURCERELEASE
  635. if(*pdbl != 0 || pchT > m_currentCharacter + 1)
  636. m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
  637. switch (*pchT)
  638. {
  639. case '8':
  640. case '9':
  641. // case 'e':
  642. // case 'E':
  643. // case '.':
  644. m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
  645. goto LFloat;
  646. }
  647. m_currentCharacter = pchT;
  648. break;
  649. }
  650. }
  651. else
  652. {
  653. LFloat:
  654. // Let StrToDbl do all the work.
  655. *pdbl = Js::NumberUtilities::StrToDbl(m_currentCharacter, &pchT, likelyInt);
  656. if (pchT == m_currentCharacter)
  657. return FALSE;
  658. m_currentCharacter = pchT;
  659. Assert(!Js::NumberUtilities::IsNan(*pdbl));
  660. }
  661. return TRUE;
  662. }
  663. template <typename EncodingPolicy>
  664. tokens Scanner<EncodingPolicy>::TryRescanRegExp()
  665. {
  666. EncodedCharPtr current = m_currentCharacter;
  667. tokens result = RescanRegExp();
  668. if (result == tkScanError)
  669. m_currentCharacter = current;
  670. return result;
  671. }
  672. template <typename EncodingPolicy>
  673. tokens Scanner<EncodingPolicy>::RescanRegExp()
  674. {
  675. #if DEBUG
  676. switch (m_ptoken->tk)
  677. {
  678. case tkDiv:
  679. Assert(m_currentCharacter == m_pchMinTok + 1);
  680. break;
  681. case tkAsgDiv:
  682. Assert(m_currentCharacter == m_pchMinTok + 2);
  683. break;
  684. default:
  685. AssertMsg(FALSE, "Who is calling RescanRegExp?");
  686. break;
  687. }
  688. #endif //DEBUG
  689. m_currentCharacter = m_pchMinTok;
  690. if (*m_currentCharacter != '/')
  691. Error(ERRnoSlash);
  692. m_currentCharacter++;
  693. tokens tk = tkNone;
  694. {
  695. ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
  696. tk = ScanRegExpConstant(&alloc);
  697. }
  698. return tk;
  699. }
  700. template <typename EncodingPolicy>
  701. tokens Scanner<EncodingPolicy>::RescanRegExpNoAST()
  702. {
  703. #if DEBUG
  704. switch (m_ptoken->tk)
  705. {
  706. case tkDiv:
  707. Assert(m_currentCharacter == m_pchMinTok + 1);
  708. break;
  709. case tkAsgDiv:
  710. Assert(m_currentCharacter == m_pchMinTok + 2);
  711. break;
  712. default:
  713. AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
  714. break;
  715. }
  716. #endif //DEBUG
  717. m_currentCharacter = m_pchMinTok;
  718. if (*m_currentCharacter != '/')
  719. Error(ERRnoSlash);
  720. m_currentCharacter++;
  721. tokens tk = tkNone;
  722. {
  723. ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
  724. {
  725. tk = ScanRegExpConstantNoAST(&alloc);
  726. }
  727. }
  728. return tk;
  729. }
  730. template <typename EncodingPolicy>
  731. tokens Scanner<EncodingPolicy>::RescanRegExpTokenizer()
  732. {
  733. #if DEBUG
  734. switch (m_ptoken->tk)
  735. {
  736. case tkDiv:
  737. Assert(m_currentCharacter == m_pchMinTok + 1);
  738. break;
  739. case tkAsgDiv:
  740. Assert(m_currentCharacter == m_pchMinTok + 2);
  741. break;
  742. default:
  743. AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
  744. break;
  745. }
  746. #endif //DEBUG
  747. m_currentCharacter = m_pchMinTok;
  748. if (*m_currentCharacter != '/')
  749. Error(ERRnoSlash);
  750. m_currentCharacter++;
  751. tokens tk = tkNone;
  752. ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread();
  753. threadContext->EnsureRecycler();
  754. Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp"));
  755. TryFinally(
  756. [&]() /* try block */
  757. {
  758. tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator());
  759. },
  760. [&](bool /* hasException */) /* finally block */
  761. {
  762. threadContext->ReleaseTemporaryAllocator(alloc);
  763. });
  764. return tk;
  765. }
  766. template <typename EncodingPolicy>
  767. tokens Scanner<EncodingPolicy>::ScanRegExpConstant(ArenaAllocator* alloc)
  768. {
  769. PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
  770. // SEE ALSO: RegexHelper::PrimCompileDynamic()
  771. #ifdef PROFILE_EXEC
  772. m_scriptContext->ProfileBegin(Js::RegexCompilePhase);
  773. #endif
  774. ArenaAllocator* ctAllocator = alloc;
  775. UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
  776. UnifiedRegex::StandardChars<char16>* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  777. #if ENABLE_REGEX_CONFIG_OPTIONS
  778. UnifiedRegex::DebugWriter *w = 0;
  779. if (REGEX_CONFIG_FLAG(RegexDebug))
  780. w = m_scriptContext->GetRegexDebugWriter();
  781. if (REGEX_CONFIG_FLAG(RegexProfile))
  782. m_scriptContext->GetRegexStatsDatabase()->BeginProfile();
  783. #endif
  784. UnifiedRegex::Node* root = 0;
  785. charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
  786. UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
  787. UnifiedRegex::Parser<EncodingPolicy, true> parser
  788. ( m_scriptContext
  789. , ctAllocator
  790. , standardEncodedChars
  791. , standardChars
  792. , this->IsFromExternalSource()
  793. #if ENABLE_REGEX_CONFIG_OPTIONS
  794. , w
  795. #endif
  796. );
  797. try
  798. {
  799. root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags);
  800. }
  801. catch (UnifiedRegex::ParseError e)
  802. {
  803. #ifdef PROFILE_EXEC
  804. m_scriptContext->ProfileEnd(Js::RegexCompilePhase);
  805. #endif
  806. m_currentCharacter += e.encodedPos;
  807. Error(e.error);
  808. }
  809. UnifiedRegex::RegexPattern* pattern;
  810. if (m_parser->IsBackgroundParser())
  811. {
  812. // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern
  813. // and hook it to this parse node.
  814. pattern = parser.template CompileProgram<false>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
  815. }
  816. else
  817. {
  818. pattern = parser.template CompileProgram<true>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
  819. }
  820. this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
  821. return m_ptoken->SetRegex(pattern, m_parser);
  822. }
  823. template<typename EncodingPolicy>
  824. tokens Scanner<EncodingPolicy>::ScanRegExpConstantNoAST(ArenaAllocator* alloc)
  825. {
  826. PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
  827. ThreadContext *threadContext = m_scriptContext->GetThreadContext();
  828. UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0);
  829. UnifiedRegex::StandardChars<char16>* standardChars = threadContext->GetStandardChars((char16*)0);
  830. charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
  831. UnifiedRegex::Parser<EncodingPolicy, true> parser
  832. ( m_scriptContext
  833. , alloc
  834. , standardEncodedChars
  835. , standardChars
  836. , this->IsFromExternalSource()
  837. #if ENABLE_REGEX_CONFIG_OPTIONS
  838. , 0
  839. #endif
  840. );
  841. try
  842. {
  843. parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars);
  844. }
  845. catch (UnifiedRegex::ParseError e)
  846. {
  847. m_currentCharacter += e.encodedPos;
  848. Error(e.error);
  849. // never reached
  850. }
  851. UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram<false>(nullptr, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, UnifiedRegex::NoRegexFlags);
  852. Assert(pattern == nullptr); // BuildAST == false, CompileProgram should return nullptr
  853. this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
  854. return (m_ptoken->tk = tkRegExp);
  855. }
  856. template<typename EncodingPolicy>
  857. tokens Scanner<EncodingPolicy>::ScanStringTemplateBegin(EncodedCharPtr *pp)
  858. {
  859. // String template must begin with a string constant followed by '`' or '${'
  860. ScanStringConstant<true, true>('`', pp);
  861. OLECHAR ch;
  862. EncodedCharPtr last = m_pchLast;
  863. ch = this->ReadFirst(*pp, last);
  864. if (ch == '`')
  865. {
  866. // Simple string template - no substitutions
  867. return tkStrTmplBasic;
  868. }
  869. else if (ch == '$')
  870. {
  871. ch = this->ReadFirst(*pp, last);
  872. if (ch == '{')
  873. {
  874. // Next token after expr should be tkStrTmplMid or tkStrTmplEnd.
  875. // In string template scanning mode, we expect the next char to be '}'
  876. // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid
  877. m_fStringTemplateDepth++;
  878. // Regular string template begin - next is first substitution
  879. return tkStrTmplBegin;
  880. }
  881. }
  882. // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
  883. (*pp)--;
  884. return ScanError(m_currentCharacter, tkStrTmplBegin);
  885. }
  886. template<typename EncodingPolicy>
  887. tokens Scanner<EncodingPolicy>::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp)
  888. {
  889. // String template middle and end tokens must begin with a string constant
  890. ScanStringConstant<true, true>('`', pp);
  891. OLECHAR ch;
  892. EncodedCharPtr last = m_pchLast;
  893. ch = this->ReadFirst(*pp, last);
  894. if (ch == '`')
  895. {
  896. // No longer in string template scanning mode
  897. m_fStringTemplateDepth--;
  898. // This is the last part of the template ...`
  899. return tkStrTmplEnd;
  900. }
  901. else if (ch == '$')
  902. {
  903. ch = this->ReadFirst(*pp, last);
  904. if (ch == '{')
  905. {
  906. // This is just another middle part of the template }...${
  907. return tkStrTmplMid;
  908. }
  909. }
  910. // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
  911. (*pp)--;
  912. return ScanError(m_currentCharacter, tkStrTmplEnd);
  913. }
  914. /*****************************************************************************
  915. *
  916. * Parses a string constant. Note that the string value is stored in
  917. * a volatile buffer (or allocated on the heap if too long), and thus
  918. * the string should be saved off before the next token is scanned.
  919. */
  920. template<typename EncodingPolicy>
  921. template<bool stringTemplateMode, bool createRawString>
  922. tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
  923. {
  924. static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value");
  925. OLECHAR ch, c, rawch;
  926. int wT;
  927. EncodedCharPtr p = *pp;
  928. EncodedCharPtr last = m_pchLast;
  929. // Reset
  930. m_OctOrLeadingZeroOnLastTKNumber = false;
  931. m_EscapeOnLastTkStrCon = FALSE;
  932. m_tempChBuf.Init();
  933. // Use template parameter to gate raw string creation.
  934. // If createRawString is false, all these operations should be no-ops
  935. if (createRawString)
  936. {
  937. m_tempChBufSecondary.Init();
  938. }
  939. for (;;)
  940. {
  941. switch ((rawch = ch = this->ReadFirst(p, last)))
  942. {
  943. case kchRET:
  944. if (stringTemplateMode)
  945. {
  946. if (this->PeekFirst(p, last) == kchNWL)
  947. {
  948. // Eat the <LF> char, ignore return
  949. this->ReadFirst(p, last);
  950. }
  951. // Both <CR> and <CR><LF> are normalized to <LF> in template cooked and raw values
  952. ch = rawch = kchNWL;
  953. }
  954. LEcmaLineBreak:
  955. // Fall through
  956. case kchNWL:
  957. if (stringTemplateMode)
  958. {
  959. // Notify the scanner to update current line, number of lines etc
  960. NotifyScannedNewLine();
  961. break;
  962. }
  963. m_currentCharacter = p - 1;
  964. Error(ERRnoStrEnd);
  965. case '"':
  966. case '\'':
  967. if (ch == delim)
  968. goto LBreak;
  969. break;
  970. case '`':
  971. // In string template scan mode, don't consume the '`' - we need to differentiate
  972. // between a closed string template and the expression open sequence - ${
  973. if (stringTemplateMode)
  974. {
  975. p--;
  976. goto LBreak;
  977. }
  978. // If we aren't scanning for a string template, do the default thing
  979. goto LMainDefault;
  980. case '$':
  981. // If we are parsing a string literal part of a string template, ${ indicates we need to switch
  982. // to parsing an expression.
  983. if (stringTemplateMode && this->PeekFirst(p, last) == '{')
  984. {
  985. // Rewind to the $ and return
  986. p--;
  987. goto LBreak;
  988. }
  989. // If we aren't scanning for a string template, do the default thing
  990. goto LMainDefault;
  991. case kchNUL:
  992. if (p > last)
  993. {
  994. m_currentCharacter = p - 1;
  995. Error(ERRnoStrEnd);
  996. }
  997. break;
  998. default:
  999. LMainDefault:
  1000. if (this->IsMultiUnitChar(ch))
  1001. {
  1002. if ((ch == kchLS || ch == kchPS))
  1003. {
  1004. goto LEcmaLineBreak;
  1005. }
  1006. rawch = ch = this->template ReadRest<true>(ch, p, last);
  1007. switch (ch)
  1008. {
  1009. case kchLS: // 0x2028, classifies as new line
  1010. case kchPS: // 0x2029, classifies as new line
  1011. goto LEcmaLineBreak;
  1012. }
  1013. }
  1014. break;
  1015. case kchBSL:
  1016. // In raw mode '\\' is not an escape character, just add the char into the raw buffer.
  1017. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1018. m_EscapeOnLastTkStrCon=TRUE;
  1019. // In raw mode, we append the raw char itself and not the escaped value so save the char.
  1020. rawch = ch = this->ReadFirst(p, last);
  1021. codepoint_t codePoint = 0;
  1022. uint errorType = (uint)ERRbadHexDigit;
  1023. switch (ch)
  1024. {
  1025. case 'b':
  1026. ch = 0x08;
  1027. break;
  1028. case 't':
  1029. ch = 0x09;
  1030. break;
  1031. case 'v':
  1032. ch = 0x0B; //Only in ES5 mode
  1033. break; //same as default
  1034. case 'n':
  1035. ch = 0x0A;
  1036. break;
  1037. case 'f':
  1038. ch = 0x0C;
  1039. break;
  1040. case 'r':
  1041. ch = 0x0D;
  1042. break;
  1043. case 'x':
  1044. // Insert the 'x' here before jumping to parse the hex digits.
  1045. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1046. // 2 hex digits
  1047. ch = 0;
  1048. goto LTwoHex;
  1049. case 'u':
  1050. // Raw string just inserts a 'u' here.
  1051. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1052. ch = 0;
  1053. if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1054. goto LFourHex;
  1055. else if (c != '{' || !this->es6UnicodeMode)
  1056. goto ReturnScanError;
  1057. Assert(c == '{');
  1058. // c should definitely be a '{' which should be appended to the raw string.
  1059. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1060. //At least one digit is expected
  1061. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1062. {
  1063. goto ReturnScanError;
  1064. }
  1065. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1066. codePoint = static_cast<codepoint_t>(wT);
  1067. while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1068. {
  1069. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1070. codePoint <<= 4;
  1071. codePoint += static_cast<codepoint_t>(wT);
  1072. if (codePoint > 0x10FFFF)
  1073. {
  1074. errorType = (uint)ERRInvalidCodePoint;
  1075. goto ReturnScanError;
  1076. }
  1077. }
  1078. if (c != '}')
  1079. {
  1080. errorType = (uint)ERRMissingCurlyBrace;
  1081. goto ReturnScanError;
  1082. }
  1083. Assert(codePoint <= 0x10FFFF);
  1084. if (codePoint >= 0x10000)
  1085. {
  1086. OLECHAR lower = 0;
  1087. Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch);
  1088. m_tempChBuf.AppendCh(lower);
  1089. }
  1090. else
  1091. {
  1092. ch = (char16)codePoint;
  1093. }
  1094. // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
  1095. if (createRawString)
  1096. rawch = c;
  1097. break;
  1098. LFourHex:
  1099. codePoint = 0x0;
  1100. // Append first hex digit character to the raw string.
  1101. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1102. codePoint += static_cast<codepoint_t>(wT * 0x1000);
  1103. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1104. goto ReturnScanError;
  1105. // Append fourth (or second) hex digit character to the raw string.
  1106. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1107. codePoint += static_cast<codepoint_t>(wT * 0x0100);
  1108. LTwoHex:
  1109. // This code path doesn't expect curly.
  1110. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1111. goto ReturnScanError;
  1112. // Append first hex digit character to the raw string.
  1113. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1114. codePoint += static_cast<codepoint_t>(wT * 0x0010);
  1115. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1116. goto ReturnScanError;
  1117. codePoint += static_cast<codepoint_t>(wT);
  1118. // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
  1119. if (createRawString)
  1120. rawch = c;
  1121. if (codePoint < 0x10000)
  1122. {
  1123. ch = static_cast<OLECHAR>(codePoint);
  1124. }
  1125. else
  1126. {
  1127. goto ReturnScanError;
  1128. }
  1129. break;
  1130. case '0':
  1131. case '1':
  1132. case '2':
  1133. case '3':
  1134. // 1 to 3 octal digits
  1135. ch -= '0';
  1136. // Octal escape sequences are not allowed inside string template literals
  1137. if (stringTemplateMode)
  1138. {
  1139. c = this->PeekFirst(p, last);
  1140. if (ch != 0 || (c >= '0' && c <= '7'))
  1141. {
  1142. errorType = (uint)ERRES5NoOctal;
  1143. goto ReturnScanError;
  1144. }
  1145. break;
  1146. }
  1147. wT = (c = this->ReadFirst(p, last)) - '0';
  1148. if ((char16)wT > 7)
  1149. {
  1150. if (ch != 0 || ((char16)wT <= 9))
  1151. {
  1152. m_OctOrLeadingZeroOnLastTKNumber = true;
  1153. }
  1154. p--;
  1155. break;
  1156. }
  1157. m_OctOrLeadingZeroOnLastTKNumber = true;
  1158. ch = static_cast< OLECHAR >(ch * 8 + wT);
  1159. goto LOneOctal;
  1160. case '4':
  1161. case '5':
  1162. case '6':
  1163. case '7':
  1164. // 1 to 2 octal digits
  1165. // Octal escape sequences are not allowed inside string template literals
  1166. if (stringTemplateMode)
  1167. {
  1168. errorType = (uint)ERRES5NoOctal;
  1169. goto ReturnScanError;
  1170. }
  1171. ch -= '0';
  1172. m_OctOrLeadingZeroOnLastTKNumber = true;
  1173. LOneOctal:
  1174. wT = (c = this->ReadFirst(p, last)) - '0';
  1175. if ((char16)wT > 7)
  1176. {
  1177. p--;
  1178. break;
  1179. }
  1180. ch = static_cast< OLECHAR >(ch * 8 + wT);
  1181. break;
  1182. case kchRET: // 0xD
  1183. if (stringTemplateMode)
  1184. {
  1185. // If this is \<CR><LF> we can eat the <LF> right now
  1186. if (this->PeekFirst(p, last) == kchNWL)
  1187. {
  1188. // Eat the <LF> char, ignore return
  1189. this->ReadFirst(p, last);
  1190. }
  1191. // Both \<CR> and \<CR><LF> are normalized to \<LF> in template raw string
  1192. rawch = kchNWL;
  1193. }
  1194. case kchLS: // 0x2028, classifies as new line
  1195. case kchPS: // 0x2029, classifies as new line
  1196. case kchNWL: // 0xA
  1197. LEcmaEscapeLineBreak:
  1198. if (stringTemplateMode)
  1199. {
  1200. // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings
  1201. m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
  1202. // Template literal strings ignore all escaped line continuation tokens
  1203. NotifyScannedNewLine();
  1204. continue;
  1205. }
  1206. m_currentCharacter = p;
  1207. ScanNewLine(ch);
  1208. p = m_currentCharacter;
  1209. continue;
  1210. case 0:
  1211. if (p >= last)
  1212. {
  1213. errorType = (uint)ERRnoStrEnd;
  1214. ReturnScanError:
  1215. m_currentCharacter = p - 1;
  1216. Error(errorType);
  1217. }
  1218. else if (stringTemplateMode)
  1219. {
  1220. // Escaped null character is translated into 0x0030 for raw template literals
  1221. rawch = 0x0030;
  1222. }
  1223. break;
  1224. default:
  1225. if (this->IsMultiUnitChar(ch))
  1226. {
  1227. rawch = ch = this->template ReadRest<true>(ch, p, last);
  1228. switch (ch)
  1229. {
  1230. case kchLS:
  1231. case kchPS:
  1232. goto LEcmaEscapeLineBreak;
  1233. }
  1234. }
  1235. break;
  1236. }
  1237. break;
  1238. }
  1239. m_tempChBuf.AppendCh(ch);
  1240. m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
  1241. }
  1242. LBreak:
  1243. bool createPid = true;
  1244. if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
  1245. {
  1246. createPid = false;
  1247. if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR))))
  1248. {
  1249. createPid = true;
  1250. }
  1251. }
  1252. if (createPid)
  1253. {
  1254. m_ptoken->SetIdentifier(m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur));
  1255. }
  1256. else
  1257. {
  1258. m_ptoken->SetIdentifier(NULL);
  1259. }
  1260. m_scanState = ScanStateNormal;
  1261. m_doubleQuoteOnLastTkStrCon = '"' == delim;
  1262. *pp = p;
  1263. return tkStrCon;
  1264. }
  1265. template<typename EncodingPolicy>
  1266. tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
  1267. {
  1268. return ScanStringConstant<false, false>(delim, pp);
  1269. }
  1270. /*****************************************************************************
  1271. *
  1272. * Consume a C-style comment.
  1273. */
  1274. template<typename EncodingPolicy>
  1275. tokens Scanner<EncodingPolicy>::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef)
  1276. {
  1277. Assert(containTypeDef != nullptr);
  1278. EncodedCharPtr p = *pp;
  1279. *containTypeDef = false;
  1280. EncodedCharPtr last = m_pchLast;
  1281. OLECHAR ch;
  1282. for (;;)
  1283. {
  1284. switch((ch = this->ReadFirst(p, last)))
  1285. {
  1286. case '*':
  1287. if (*p == '/')
  1288. {
  1289. *pp = p + 1;
  1290. return tkNone;
  1291. }
  1292. break;
  1293. // ES 2015 11.3 Line Terminators
  1294. case kchLS: // 0x2028, classifies as new line
  1295. case kchPS: // 0x2029, classifies as new line
  1296. LEcmaLineBreak:
  1297. goto LLineBreak;
  1298. case kchRET:
  1299. case kchNWL:
  1300. LLineBreak:
  1301. m_fHadEol = TRUE;
  1302. m_currentCharacter = p;
  1303. ScanNewLine(ch);
  1304. p = m_currentCharacter;
  1305. break;
  1306. case kchNUL:
  1307. if (p >= last)
  1308. {
  1309. m_currentCharacter = p - 1;
  1310. *pp = p - 1;
  1311. Error(ERRnoCmtEnd);
  1312. }
  1313. break;
  1314. default:
  1315. if (this->IsMultiUnitChar(ch))
  1316. {
  1317. ch = this->template ReadRest<true>(ch, p, last);
  1318. switch (ch)
  1319. {
  1320. case kchLS:
  1321. case kchPS:
  1322. goto LEcmaLineBreak;
  1323. }
  1324. }
  1325. break;
  1326. }
  1327. }
  1328. }
  1329. /*****************************************************************************
  1330. *
  1331. * We've encountered a newline - update various counters and things.
  1332. */
  1333. template<typename EncodingPolicy>
  1334. void Scanner<EncodingPolicy>::ScanNewLine(uint ch)
  1335. {
  1336. if (ch == '\r' && PeekNextChar() == '\n')
  1337. {
  1338. ReadNextChar();
  1339. }
  1340. NotifyScannedNewLine();
  1341. }
  1342. /*****************************************************************************
  1343. *
  1344. * We've encountered a newline - update various counters and things.
  1345. */
  1346. template<typename EncodingPolicy>
  1347. void Scanner<EncodingPolicy>::NotifyScannedNewLine()
  1348. {
  1349. // update in scanner: previous line, current line, number of lines.
  1350. m_line++;
  1351. m_pchPrevLine = m_pchMinLine;
  1352. m_pchMinLine = m_currentCharacter;
  1353. m_cMinLineMultiUnits = this->m_cMultiUnits;
  1354. }
  1355. /*****************************************************************************
  1356. *
  1357. * Delivers a token stream.
  1358. */
  1359. template<typename EncodingPolicy>
  1360. tokens Scanner<EncodingPolicy>::ScanForcingPid()
  1361. {
  1362. if (m_DeferredParseFlags != ScanFlagNone)
  1363. {
  1364. BYTE deferredParseFlagsSave = m_DeferredParseFlags;
  1365. m_DeferredParseFlags = ScanFlagNone;
  1366. tokens result = tkEOF;
  1367. TryFinally(
  1368. [&]() /* try block */
  1369. {
  1370. result = this->Scan();
  1371. },
  1372. [&](bool) /* finally block */
  1373. {
  1374. this->m_DeferredParseFlags = deferredParseFlagsSave;
  1375. });
  1376. return result;
  1377. }
  1378. return Scan();
  1379. }
  1380. template<typename EncodingPolicy>
  1381. tokens Scanner<EncodingPolicy>::Scan()
  1382. {
  1383. return ScanCore(true);
  1384. }
  1385. template<typename EncodingPolicy>
  1386. tokens Scanner<EncodingPolicy>::ScanNoKeywords()
  1387. {
  1388. return ScanCore(false);
  1389. }
  1390. template<typename EncodingPolicy>
  1391. tokens Scanner<EncodingPolicy>::ScanAhead()
  1392. {
  1393. return ScanNoKeywords();
  1394. }
  1395. template<typename EncodingPolicy>
  1396. tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
  1397. {
  1398. codepoint_t ch;
  1399. OLECHAR firstChar;
  1400. OLECHAR secondChar;
  1401. EncodedCharPtr pchT;
  1402. size_t multiUnits = 0;
  1403. EncodedCharPtr p = m_currentCharacter;
  1404. EncodedCharPtr last = m_pchLast;
  1405. bool seenDelimitedCommentEnd = false;
  1406. // store the last token
  1407. m_tkPrevious = m_ptoken->tk;
  1408. m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas
  1409. if (p >= last)
  1410. {
  1411. m_pchMinTok = p;
  1412. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1413. goto LEof;
  1414. }
  1415. tokens token;
  1416. m_fHadEol = FALSE;
  1417. CharTypes chType;
  1418. charcount_t commentStartLine;
  1419. if (m_scanState && *p != 0)
  1420. {
  1421. if (m_scanState == ScanStateStringTemplateMiddleOrEnd)
  1422. {
  1423. AssertMsg(m_fStringTemplateDepth > 0,
  1424. "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template");
  1425. m_scanState = ScanStateNormal;
  1426. pchT = p;
  1427. token = ScanStringTemplateMiddleOrEnd(&pchT);
  1428. p = pchT;
  1429. goto LDone;
  1430. }
  1431. }
  1432. for (;;)
  1433. {
  1434. LLoop:
  1435. m_pchMinTok = p;
  1436. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1437. ch = this->ReadFirst(p, last);
  1438. #if DEBUG
  1439. chType = this->charClassifier->GetCharType((OLECHAR)ch);
  1440. #endif
  1441. switch (ch)
  1442. {
  1443. default:
  1444. if (ch == kchLS ||
  1445. ch == kchPS )
  1446. {
  1447. goto LNewLine;
  1448. }
  1449. {
  1450. BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch);
  1451. if (isMultiUnit)
  1452. {
  1453. ch = this->template ReadRest<true>((OLECHAR)ch, p, last);
  1454. }
  1455. if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
  1456. {
  1457. codepoint_t upper = this->PeekFull(p, last);
  1458. if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
  1459. {
  1460. // Consume the rest of the utf8 bytes for the codepoint
  1461. OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);
  1462. Assert(decodedUpper == (OLECHAR) upper);
  1463. ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);
  1464. }
  1465. }
  1466. if (this->charClassifier->IsIdStart(ch))
  1467. {
  1468. // We treat IDContinue as an error.
  1469. token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p);
  1470. break;
  1471. }
  1472. }
  1473. chType = this->charClassifier->GetCharType(ch);
  1474. switch (chType)
  1475. {
  1476. case _C_WSP: continue;
  1477. case _C_NWL: goto LNewLine;
  1478. // All other types (except errors) are handled by the outer switch.
  1479. }
  1480. Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG);
  1481. m_currentCharacter = p - 1;
  1482. Error(ERRillegalChar);
  1483. continue;
  1484. case '\0':
  1485. // Put back the null in case we get called again.
  1486. p--;
  1487. if (p < last)
  1488. {
  1489. // A \0 prior to the end of the text is an invalid character.
  1490. Error(ERRillegalChar);
  1491. }
  1492. LEof:
  1493. Assert(p >= last);
  1494. token = tkEOF;
  1495. break;
  1496. case 0x0009:
  1497. case 0x000B:
  1498. case 0x000C:
  1499. case 0x0020:
  1500. Assert(chType == _C_WSP);
  1501. continue;
  1502. case '.':
  1503. if (!Js::NumberUtilities::IsDigit(*p))
  1504. {
  1505. // Not a double
  1506. if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() &&
  1507. this->PeekFirst(p, last) == '.' &&
  1508. this->PeekFirst(p + 1, last) == '.')
  1509. {
  1510. token = tkEllipsis;
  1511. p += 2;
  1512. }
  1513. else
  1514. {
  1515. token = tkDot;
  1516. }
  1517. break;
  1518. }
  1519. // May be a double, fall through
  1520. case '0': case '1': case '2': case '3': case '4':
  1521. case '5': case '6': case '7': case '8': case '9':
  1522. {
  1523. double dbl;
  1524. Assert(chType == _C_DIG || chType == _C_DOT);
  1525. p = m_pchMinTok;
  1526. this->RestoreMultiUnits(m_cMinTokMultiUnits);
  1527. bool likelyInt = true;
  1528. pchT = FScanNumber(p, &dbl, likelyInt);
  1529. if (p == pchT)
  1530. {
  1531. Assert(this->PeekFirst(p, last) != '.');
  1532. Error(ERRbadNumber);
  1533. }
  1534. Assert(!Js::NumberUtilities::IsNan(dbl));
  1535. p = pchT;
  1536. int32 value;
  1537. if (likelyInt && Js::NumberUtilities::FDblIsInt32(dbl, &value))
  1538. {
  1539. m_ptoken->SetLong(value);
  1540. token = tkIntCon;
  1541. }
  1542. else
  1543. {
  1544. token = tkFltCon;
  1545. m_ptoken->SetDouble(dbl, likelyInt);
  1546. }
  1547. break;
  1548. }
  1549. case '(': Assert(chType == _C_LPR); token = tkLParen; break;
  1550. case ')': Assert(chType == _C_RPR); token = tkRParen; break;
  1551. case ',': Assert(chType == _C_CMA); token = tkComma; break;
  1552. case ';': Assert(chType == _C_SMC); token = tkSColon; break;
  1553. case '[': Assert(chType == _C_LBR); token = tkLBrack; break;
  1554. case ']': Assert(chType == _C_RBR); token = tkRBrack; break;
  1555. case '~': Assert(chType == _C_TIL); token = tkTilde; break;
  1556. case '?': Assert(chType == _C_QUE); token = tkQMark; break;
  1557. case '{': Assert(chType == _C_LC); token = tkLCurly; break;
  1558. // ES 2015 11.3 Line Terminators
  1559. case '\r':
  1560. case '\n':
  1561. // kchLS:
  1562. // kchPS:
  1563. LNewLine:
  1564. m_currentCharacter = p;
  1565. ScanNewLine(ch);
  1566. p = m_currentCharacter;
  1567. m_fHadEol = TRUE;
  1568. continue;
  1569. LReserved:
  1570. {
  1571. // We will derive the PID from the token
  1572. Assert(token < tkID);
  1573. m_ptoken->SetIdentifier(NULL);
  1574. goto LDone;
  1575. }
  1576. LEval:
  1577. {
  1578. token = tkID;
  1579. if (!this->m_parser) goto LIdentifier;
  1580. m_ptoken->SetIdentifier(this->m_parser->GetEvalPid());
  1581. goto LDone;
  1582. }
  1583. LArguments:
  1584. {
  1585. token = tkID;
  1586. if (!this->m_parser) goto LIdentifier;
  1587. m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid());
  1588. goto LDone;
  1589. }
  1590. LTarget:
  1591. {
  1592. token = tkID;
  1593. if (!this->m_parser) goto LIdentifier;
  1594. m_ptoken->SetIdentifier(this->m_parser->GetTargetPid());
  1595. goto LDone;
  1596. }
  1597. #include "kwd-swtch.h"
  1598. case 'A': case 'B': case 'C': case 'D': case 'E':
  1599. case 'F': case 'G': case 'H': case 'I': case 'J':
  1600. case 'K': case 'L': case 'M': case 'N': case 'O':
  1601. case 'P': case 'Q': case 'R': case 'S': case 'T':
  1602. case 'U': case 'V': case 'W': case 'X': case 'Y':
  1603. case 'Z':
  1604. // Lower-case letters handled in kwd-swtch.h above during reserved word recognition.
  1605. case '$': case '_':
  1606. LIdentifier:
  1607. Assert(this->charClassifier->IsIdStart(ch));
  1608. Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch));
  1609. token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p);
  1610. break;
  1611. case '`':
  1612. Assert(chType == _C_BKQ);
  1613. pchT = p;
  1614. token = ScanStringTemplateBegin(&pchT);
  1615. p = pchT;
  1616. break;
  1617. case '}':
  1618. Assert(chType == _C_RC);
  1619. token = tkRCurly;
  1620. break;
  1621. case '\\':
  1622. pchT = p - 1;
  1623. token = ScanIdentifier(identifyKwds, &pchT);
  1624. if (tkScanError == token)
  1625. {
  1626. m_currentCharacter = p;
  1627. Error(ERRillegalChar);
  1628. }
  1629. p = pchT;
  1630. break;
  1631. case ':':
  1632. token = tkColon;
  1633. break;
  1634. case '=':
  1635. token = tkAsg;
  1636. switch (this->PeekFirst(p, last))
  1637. {
  1638. case '=':
  1639. p++;
  1640. token = tkEQ;
  1641. if (this->PeekFirst(p, last) == '=')
  1642. {
  1643. p++;
  1644. token = tkEqv;
  1645. }
  1646. break;
  1647. case '>':
  1648. p++;
  1649. token = tkDArrow;
  1650. break;
  1651. }
  1652. break;
  1653. case '!':
  1654. token = tkBang;
  1655. if (this->PeekFirst(p, last) == '=')
  1656. {
  1657. p++;
  1658. token = tkNE;
  1659. if (this->PeekFirst(p, last) == '=')
  1660. {
  1661. p++;
  1662. token = tkNEqv;
  1663. }
  1664. }
  1665. break;
  1666. case '+':
  1667. token = tkAdd;
  1668. switch (this->PeekFirst(p, last))
  1669. {
  1670. case '=':
  1671. p++;
  1672. token = tkAsgAdd;
  1673. break;
  1674. case '+':
  1675. p++;
  1676. token = tkInc;
  1677. break;
  1678. }
  1679. break;
  1680. case '-':
  1681. token = tkSub;
  1682. switch (this->PeekFirst(p, last))
  1683. {
  1684. case '=':
  1685. p++;
  1686. token = tkAsgSub;
  1687. break;
  1688. case '-':
  1689. p++;
  1690. token = tkDec;
  1691. if (!m_fIsModuleCode)
  1692. {
  1693. // https://tc39.github.io/ecma262/#prod-annexB-MultiLineComment
  1694. // If there was a new line in the multi-line comment, the text after --> is a comment.
  1695. if ('>' == this->PeekFirst(p, last) && m_fHadEol)
  1696. {
  1697. goto LSkipLineComment;
  1698. }
  1699. }
  1700. break;
  1701. }
  1702. break;
  1703. case '*':
  1704. token = tkStar;
  1705. switch(this->PeekFirst(p, last))
  1706. {
  1707. case '=' :
  1708. p++;
  1709. token = tkAsgMul;
  1710. break;
  1711. case '*' :
  1712. if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled())
  1713. {
  1714. break;
  1715. }
  1716. p++;
  1717. token = tkExpo;
  1718. if (this->PeekFirst(p, last) == '=')
  1719. {
  1720. p++;
  1721. token = tkAsgExpo;
  1722. }
  1723. }
  1724. break;
  1725. case '/':
  1726. token = tkDiv;
  1727. switch(this->PeekFirst(p, last))
  1728. {
  1729. case '=':
  1730. p++;
  1731. token = tkAsgDiv;
  1732. break;
  1733. case '/':
  1734. if (p >= last)
  1735. {
  1736. AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?");
  1737. // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning
  1738. // those, we have passed "last" already. Move back and return EOF.
  1739. p = last;
  1740. goto LEof;
  1741. }
  1742. ch = *++p;
  1743. firstChar = (OLECHAR)ch;
  1744. LSkipLineComment:
  1745. pchT = NULL;
  1746. for (;;)
  1747. {
  1748. switch ((ch = this->ReadFirst(p, last)))
  1749. {
  1750. case kchLS: // 0x2028, classifies as new line
  1751. case kchPS: // 0x2029, classifies as new line
  1752. LEcmaCommentLineBreak:
  1753. // kchPS and kchLS are more than one unit in UTF-8.
  1754. if (pchT)
  1755. {
  1756. // kchPS and kchLS are more than one unit in UTF-8.
  1757. p = pchT;
  1758. }
  1759. else
  1760. {
  1761. // But only a single code unit in UTF16
  1762. p--;
  1763. }
  1764. this->RestoreMultiUnits(multiUnits);
  1765. goto LCommentLineBreak;
  1766. case kchNWL:
  1767. case kchRET:
  1768. p--;
  1769. LCommentLineBreak:
  1770. // Subtract the comment length from the total char count for the purpose
  1771. // of deciding whether to defer AST and byte code generation.
  1772. m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok));
  1773. break;
  1774. case kchNUL:
  1775. // Because we used ReadFirst, we have advanced p. The character that we are looking at is actually is p - 1.
  1776. // If p == last, we are looking at p - 1, it is still within the source buffer, and we need to consider it part of the comment
  1777. // Only if p > last that we have pass the source buffer and consider it a line break
  1778. if (p > last)
  1779. {
  1780. p--;
  1781. goto LCommentLineBreak;
  1782. }
  1783. continue;
  1784. default:
  1785. if (this->IsMultiUnitChar((OLECHAR)ch))
  1786. {
  1787. pchT = p - 1;
  1788. multiUnits = this->m_cMultiUnits;
  1789. switch (ch = this->template ReadRest<true>((OLECHAR)ch, p, last))
  1790. {
  1791. case kchLS:
  1792. case kchPS:
  1793. goto LEcmaCommentLineBreak;
  1794. }
  1795. }
  1796. continue;
  1797. }
  1798. break;
  1799. }
  1800. continue;
  1801. case '*':
  1802. ch = *++p;
  1803. firstChar = (OLECHAR)ch;
  1804. if ((p + 1) < last)
  1805. {
  1806. secondChar = (OLECHAR)(*(p + 1));
  1807. }
  1808. else
  1809. {
  1810. secondChar = '\0';
  1811. }
  1812. pchT = p;
  1813. commentStartLine = m_line;
  1814. bool containTypeDef;
  1815. if (tkNone == (token = SkipComment(&pchT, &containTypeDef)))
  1816. {
  1817. // Subtract the comment length from the total char count for the purpose
  1818. // of deciding whether to defer AST and byte code generation.
  1819. m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok));
  1820. p = pchT;
  1821. seenDelimitedCommentEnd = true;
  1822. goto LLoop;
  1823. }
  1824. p = pchT;
  1825. break;
  1826. }
  1827. break;
  1828. case '%':
  1829. Assert(chType == _C_PCT);
  1830. token = tkPct;
  1831. if (this->PeekFirst(p, last) == '=')
  1832. {
  1833. p++;
  1834. token = tkAsgMod;
  1835. }
  1836. break;
  1837. case '<':
  1838. Assert(chType == _C_LT);
  1839. token = tkLT;
  1840. switch (this->PeekFirst(p, last))
  1841. {
  1842. case '=':
  1843. p++;
  1844. token = tkLE;
  1845. break;
  1846. case '<':
  1847. p++;
  1848. token = tkLsh;
  1849. if (this->PeekFirst(p, last) == '=')
  1850. {
  1851. p++;
  1852. token = tkAsgLsh;
  1853. break;
  1854. }
  1855. break;
  1856. case '!':
  1857. // ES 2015 B.1.3 - HTML comments are only allowed when parsing non-module code.
  1858. if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-')
  1859. {
  1860. // This is a "<!--" comment - treat as //
  1861. if (p >= last)
  1862. {
  1863. // Effective source length may have excluded HTMLCommentSuffix "<!-- ... -->". If we are scanning
  1864. // those, we have passed "last" already. Move back and return EOF.
  1865. p = last;
  1866. goto LEof;
  1867. }
  1868. firstChar = '!';
  1869. goto LSkipLineComment;
  1870. }
  1871. break;
  1872. }
  1873. break;
  1874. case '>':
  1875. Assert(chType == _C_GT);
  1876. token = tkGT;
  1877. switch (this->PeekFirst(p, last))
  1878. {
  1879. case '=':
  1880. p++;
  1881. token = tkGE;
  1882. break;
  1883. case '>':
  1884. p++;
  1885. token = tkRsh;
  1886. switch (this->PeekFirst(p, last))
  1887. {
  1888. case '=':
  1889. p++;
  1890. token = tkAsgRsh;
  1891. break;
  1892. case '>':
  1893. p++;
  1894. token = tkRs2;
  1895. if (*p == '=')
  1896. {
  1897. p++;
  1898. token = tkAsgRs2;
  1899. }
  1900. break;
  1901. }
  1902. break;
  1903. }
  1904. break;
  1905. case '^':
  1906. Assert(chType == _C_XOR);
  1907. token = tkXor;
  1908. if (this->PeekFirst(p, last) == '=')
  1909. {
  1910. p++;
  1911. token = tkAsgXor;
  1912. }
  1913. break;
  1914. case '|':
  1915. Assert(chType == _C_BAR);
  1916. token = tkOr;
  1917. switch (this->PeekFirst(p, last))
  1918. {
  1919. case '=':
  1920. p++;
  1921. token = tkAsgOr;
  1922. break;
  1923. case '|':
  1924. p++;
  1925. token = tkLogOr;
  1926. break;
  1927. }
  1928. break;
  1929. case '&':
  1930. Assert(chType == _C_AMP);
  1931. token = tkAnd;
  1932. switch (this->PeekFirst(p, last))
  1933. {
  1934. case '=':
  1935. p++;
  1936. token = tkAsgAnd;
  1937. break;
  1938. case '&':
  1939. p++;
  1940. token = tkLogAnd;
  1941. break;
  1942. }
  1943. break;
  1944. case '\'':
  1945. case '"':
  1946. Assert(chType == _C_QUO || chType == _C_APO);
  1947. pchT = p;
  1948. token = this->ScanStringConstant((OLECHAR)ch, &pchT);
  1949. p = pchT;
  1950. break;
  1951. }
  1952. break;
  1953. }
  1954. LDone:
  1955. m_currentCharacter = p;
  1956. return (m_ptoken->tk = token);
  1957. }
  1958. template <typename EncodingPolicy>
  1959. IdentPtr Scanner<EncodingPolicy>::GetSecondaryBufferAsPid()
  1960. {
  1961. bool createPid = true;
  1962. if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
  1963. {
  1964. createPid = false;
  1965. }
  1966. if (createPid)
  1967. {
  1968. return m_phtbl->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur);
  1969. }
  1970. else
  1971. {
  1972. return nullptr;
  1973. }
  1974. }
  1975. template <typename EncodingPolicy>
  1976. LPCOLESTR Scanner<EncodingPolicy>::StringFromLong(int32 lw)
  1977. {
  1978. _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10);
  1979. return m_tempChBuf.m_prgch;
  1980. }
  1981. template <typename EncodingPolicy>
  1982. IdentPtr Scanner<EncodingPolicy>::PidFromLong(int32 lw)
  1983. {
  1984. return m_phtbl->PidHashName(StringFromLong(lw));
  1985. }
  1986. template <typename EncodingPolicy>
  1987. LPCOLESTR Scanner<EncodingPolicy>::StringFromDbl(double dbl)
  1988. {
  1989. if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax))
  1990. {
  1991. Error(ERRnoMemory);
  1992. }
  1993. return m_tempChBuf.m_prgch;
  1994. }
  1995. template <typename EncodingPolicy>
  1996. IdentPtr Scanner<EncodingPolicy>::PidFromDbl(double dbl)
  1997. {
  1998. return m_phtbl->PidHashName(StringFromDbl(dbl));
  1999. }
  2000. template <typename EncodingPolicy>
  2001. void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint)
  2002. {
  2003. Capture(restorePoint, 0, 0);
  2004. }
  2005. template <typename EncodingPolicy>
  2006. void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr)
  2007. {
  2008. restorePoint->m_ichMinTok = this->IchMinTok();
  2009. restorePoint->m_ichMinLine = this->IchMinLine();
  2010. restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits;
  2011. restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits;
  2012. restorePoint->m_line = this->m_line;
  2013. restorePoint->m_fHadEol = this->m_fHadEol;
  2014. restorePoint->functionIdIncrement = functionIdIncrement;
  2015. restorePoint->lengthDecr = lengthDecr;
  2016. #ifdef DEBUG
  2017. restorePoint->m_cMultiUnits = this->m_cMultiUnits;
  2018. #endif
  2019. }
  2020. template <typename EncodingPolicy>
  2021. void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint)
  2022. {
  2023. SeekAndScan<false>(restorePoint);
  2024. }
  2025. template <typename EncodingPolicy>
  2026. void Scanner<EncodingPolicy>::SeekToForcingPid(const RestorePoint& restorePoint)
  2027. {
  2028. SeekAndScan<true>(restorePoint);
  2029. }
  2030. template <typename EncodingPolicy>
  2031. template <bool forcePid>
  2032. void Scanner<EncodingPolicy>::SeekAndScan(const RestorePoint& restorePoint)
  2033. {
  2034. this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits;
  2035. this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits;
  2036. this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits;
  2037. this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits);
  2038. if (forcePid)
  2039. {
  2040. this->ScanForcingPid();
  2041. }
  2042. else
  2043. {
  2044. this->Scan();
  2045. }
  2046. this->m_line = restorePoint.m_line;
  2047. this->m_fHadEol = restorePoint.m_fHadEol;
  2048. this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr);
  2049. Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits);
  2050. }
  2051. template <typename EncodingPolicy>
  2052. void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId)
  2053. {
  2054. SeekTo(restorePoint);
  2055. *nextFunctionId += restorePoint.functionIdIncrement;
  2056. }
  2057. // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred.
  2058. template<typename EncodingPolicy>
  2059. HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine)
  2060. {
  2061. if( !pbstrLine )
  2062. {
  2063. return E_POINTER;
  2064. }
  2065. // If we overflow the string, we have a serious problem...
  2066. if (ichMinLine < 0 || static_cast<size_t>(ichMinLine) > AdjustedLength() )
  2067. {
  2068. return E_UNEXPECTED;
  2069. }
  2070. typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine);
  2071. // Determine the length by scanning for the next newline
  2072. charcount_t cch = LineLength(pStart, m_pchLast);
  2073. Assert(cch <= LONG_MAX);
  2074. typename EncodingPolicy::EncodedCharPtr pEnd = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine + cch : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch);
  2075. *pbstrLine = SysAllocStringLen(NULL, cch);
  2076. if (!*pbstrLine)
  2077. {
  2078. return E_OUTOFMEMORY;
  2079. }
  2080. this->ConvertToUnicode(*pbstrLine, cch, pStart, pEnd);
  2081. return S_OK;
  2082. }
  2083. template class Scanner<NotNullTerminatedUTF8EncodingPolicy>;