2
0

Scan.cpp 77 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. /*****************************************************************************
  7. *
  8. * The following table speeds various tests of characters, such as whether
  9. * a given character can be part of an identifier, and so on.
  10. */
  11. int CountNewlines(LPCOLESTR psz, int cch)
  12. {
  13. int cln = 0;
  14. while (0 != *psz && 0 != cch--)
  15. {
  16. switch (*psz++)
  17. {
  18. case _u('\xD'):
  19. if (*psz == _u('\xA'))
  20. {
  21. ++psz;
  22. if (0 == cch--)
  23. break;
  24. }
  25. // fall-through
  26. case _u('\xA'):
  27. cln++;
  28. break;
  29. }
  30. }
  31. return cln;
  32. }
  33. template< typename CharT >
  34. struct AorW
  35. {
  36. };
  37. // Specialization for UTF8Char
  38. template<>
  39. struct AorW< UTF8Char >
  40. {
  41. // Expressing the args as "arrays of size N" ensures that the both args
  42. // are the same length. If not, we get a compile time error.
  43. template< size_t N >
  44. static const UTF8Char* Choose( const char (&a)[N], const char16 (&w)[N] )
  45. {
  46. // The reinterpret_cast is necessary to go from signed to unsigned char
  47. return reinterpret_cast< const UTF8Char* >(a);
  48. }
  49. template< size_t N >
  50. static const bool Test(const char (&a)[N], const char16 (&w)[N], LPCUTF8 value)
  51. {
  52. return 0 == memcmp(a, value, (N - 1) * sizeof(utf8char_t));
  53. }
  54. template< size_t N >
  55. static const bool Test(const char (&a)[N], const char16 (&w)[N], LPCUTF8 start, LPCUTF8 end)
  56. {
  57. return (end - start == N - 1) && (0 == memcmp(a, start, (N - 1) * sizeof(utf8char_t)));
  58. }
  59. };
  60. // Specialization for OLECHAR
  61. template<>
  62. struct AorW< OLECHAR >
  63. {
  64. template< size_t N >
  65. static const char16* Choose( const char (&a)[N], const char16 (&w)[N] )
  66. {
  67. return w;
  68. }
  69. template < size_t N >
  70. static bool Test(const char (&a)[N], const char16 (&w)[N], const char16 *value)
  71. {
  72. return 0 == memcmp(w, value, (N - 1) * sizeof(char16));
  73. }
  74. template < size_t N >
  75. static bool Test(const char (&a)[N], const char16 (&w)[N], const char16 *start, const char16 *end)
  76. {
  77. return (end - start == N - 1) && (0 == memcmp(w, start, (N - 1) * sizeof(char16)));
  78. }
  79. };
  80. BOOL Token::IsKeyword() const
  81. {
  82. // keywords (but not future reserved words)
  83. return (tk <= tkYIELD);
  84. }
  85. tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser)
  86. {
  87. Assert(parser);
  88. if(pattern)
  89. parser->RegisterRegexPattern(pattern);
  90. this->u.pattern = pattern;
  91. return tk = tkRegExp;
  92. }
  93. IdentPtr Token::CreateIdentifier(HashTbl * hashTbl)
  94. {
  95. Assert(this->u.pid == nullptr);
  96. if (this->u.pchMin)
  97. {
  98. Assert(IsIdentifier());
  99. IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.pchMin + this->u.length, this->u.length);
  100. this->u.pid = pid;
  101. return pid;
  102. }
  103. Assert(IsReservedWord());
  104. IdentPtr pid = hashTbl->PidFromTk(tk);
  105. this->u.pid = pid;
  106. return pid;
  107. }
  108. template <typename EncodingPolicy>
  109. Scanner<EncodingPolicy>::Scanner(Parser* parser, HashTbl *phtbl, Token *ptoken, Js::ScriptContext* scriptContext)
  110. {
  111. AssertMem(phtbl);
  112. AssertMem(ptoken);
  113. m_parser = parser;
  114. m_phtbl = phtbl;
  115. m_ptoken = ptoken;
  116. m_cMinLineMultiUnits = 0;
  117. m_fHadEol = FALSE;
  118. m_doubleQuoteOnLastTkStrCon = FALSE;
  119. m_OctOrLeadingZeroOnLastTKNumber = false;
  120. m_fStringTemplateDepth = 0;
  121. m_scanState = ScanStateNormal;
  122. m_scriptContext = scriptContext;
  123. m_line = 0;
  124. m_startLine = 0;
  125. m_pchStartLine = NULL;
  126. m_ichMinError = 0;
  127. m_ichLimError = 0;
  128. m_tempChBuf.m_pscanner = this;
  129. m_tempChBufSecondary.m_pscanner = this;
  130. m_iecpLimTokPrevious = (size_t)-1;
  131. this->charClassifier = scriptContext->GetCharClassifier();
  132. this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled();
  133. m_fYieldIsKeyword = false;
  134. m_fAwaitIsKeyword = false;
  135. }
  136. template <typename EncodingPolicy>
  137. Scanner<EncodingPolicy>::~Scanner(void)
  138. {
  139. }
  140. /*****************************************************************************
  141. *
  142. * Initializes the scanner to prepare to scan the given source text.
  143. */
  144. template <typename EncodingPolicy>
  145. void Scanner<EncodingPolicy>::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, ULONG grfscr, ULONG lineNumber)
  146. {
  147. // Save the start of the script and add the offset to get the point where we should start scanning.
  148. m_pchBase = pszSrc;
  149. m_pchLast = m_pchBase + offset + length;
  150. m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset;
  151. this->RestoreMultiUnits(offset - charOffset);
  152. // Absorb any byte order mark at the start
  153. if(offset == 0)
  154. {
  155. switch( this->PeekFull(m_currentCharacter, m_pchLast) )
  156. {
  157. case 0xFFEE: // "Opposite" endian BOM
  158. // We do not support big-endian encodings
  159. // fall-through
  160. case 0xFEFF: // "Correct" BOM
  161. this->template ReadFull<true>(m_currentCharacter, m_pchLast);
  162. break;
  163. }
  164. }
  165. m_line = lineNumber;
  166. m_startLine = lineNumber;
  167. m_pchStartLine = m_currentCharacter;
  168. m_ptoken->tk = tkNone;
  169. m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0;
  170. m_fHadEol = FALSE;
  171. m_fSyntaxColor = (grfscr & fscrSyntaxColor) != 0;
  172. m_DeferredParseFlags = ScanFlagNone;
  173. }
  174. template <typename EncodingPolicy>
  175. void Scanner<EncodingPolicy>::PrepareForBackgroundParse(Js::ScriptContext *scriptContext)
  176. {
  177. scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
  178. scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  179. }
  180. //-----------------------------------------------------------------------------
  181. // Number of code points from 'first' up to, but not including the next
  182. // newline character, embedded NUL, or 'last', depending on which comes first.
  183. //
  184. // This is used to determine a length of BSTR, which can't contain a NUL character.
  185. //-----------------------------------------------------------------------------
  186. template <typename EncodingPolicy>
  187. charcount_t Scanner<EncodingPolicy>::LineLength(EncodedCharPtr first, EncodedCharPtr last)
  188. {
  189. charcount_t result = 0;
  190. EncodedCharPtr p = first;
  191. for (;;)
  192. {
  193. switch( this->template ReadFull<false>(p, last) )
  194. {
  195. case kchNWL: // _C_NWL
  196. case kchRET:
  197. case kchLS:
  198. case kchPS:
  199. case kchNUL: // _C_NUL
  200. return result;
  201. }
  202. result++;
  203. }
  204. }
  205. template <typename EncodingPolicy>
  206. charcount_t Scanner<EncodingPolicy>::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd)
  207. {
  208. EncodedCharPtr p = start;
  209. charcount_t ich = ichStart;
  210. int32 current = line;
  211. charcount_t lastStart = ichStart;
  212. while (ich < ichEnd)
  213. {
  214. ich++;
  215. switch (this->template ReadFull<false>(p, last))
  216. {
  217. case kchRET:
  218. if (this->PeekFull(p, last) == kchNWL)
  219. {
  220. ich++;
  221. this->template ReadFull<false>(p, last);
  222. }
  223. // fall-through
  224. case kchNWL:
  225. case kchLS:
  226. case kchPS:
  227. current++;
  228. lastStart = ich;
  229. break;
  230. case kchNUL:
  231. goto done;
  232. }
  233. }
  234. done:
  235. line = current;
  236. return lastStart;
  237. }
  238. template <typename EncodingPolicy>
  239. bool Scanner<EncodingPolicy>::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar)
  240. {
  241. Assert(outChar != nullptr);
  242. Assert(startingLocation <= endOfSource);
  243. EncodedCharPtr currentLocation = startingLocation;
  244. codepoint_t charToOutput = 0x0;
  245. // '\' is Assumed as there is only one caller
  246. // Read 'u' characters
  247. if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u')
  248. {
  249. return false;
  250. }
  251. bool expectCurly = false;
  252. if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode)
  253. {
  254. expectCurly = true;
  255. // Move past the character
  256. this->ReadFirst(currentLocation, endOfSource);
  257. }
  258. uint i = 0;
  259. OLECHAR ch = 0;
  260. int hexValue = 0;
  261. uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u);
  262. for(; i < maxHexDigits && currentLocation < endOfSource; i++)
  263. {
  264. if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue))
  265. {
  266. break;
  267. }
  268. charToOutput = charToOutput * 0x10 + hexValue;
  269. if (charToOutput > 0x10FFFF)
  270. {
  271. return false;
  272. }
  273. }
  274. //At least 4 characters have to be read
  275. if (i == 0 || (i != 4 && !expectCurly))
  276. {
  277. return false;
  278. }
  279. Assert(expectCurly ? es6UnicodeMode : true);
  280. if (expectCurly && ch != '}')
  281. {
  282. return false;
  283. }
  284. *outChar = charToOutput;
  285. startingLocation = currentLocation;
  286. return true;
  287. }
  288. template <typename EncodingPolicy>
  289. template <bool bScan>
  290. bool Scanner<EncodingPolicy>::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar)
  291. {
  292. Assert(outChar != nullptr);
  293. Assert(outContainsMultiUnitChar != nullptr);
  294. Assert(es6UnicodeMode);
  295. Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower));
  296. EncodedCharPtr currentLocation = startingLocation;
  297. *outChar = lower;
  298. if (currentLocation < endOfSource)
  299. {
  300. size_t restorePoint = this->m_cMultiUnits;
  301. codepoint_t upper = this->template ReadFull<bScan>(currentLocation, endOfSource);
  302. if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
  303. {
  304. *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper);
  305. if (this->IsMultiUnitChar(static_cast<OLECHAR>(upper)))
  306. {
  307. *outContainsMultiUnitChar = true;
  308. }
  309. startingLocation = currentLocation;
  310. }
  311. else
  312. {
  313. this->RestoreMultiUnits(restorePoint);
  314. }
  315. }
  316. return true;
  317. }
  318. template <typename EncodingPolicy>
  319. template <bool bScan>
  320. inline bool Scanner<EncodingPolicy>::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar)
  321. {
  322. Assert(outChar != nullptr);
  323. Assert(outContainsMultiUnitChar != nullptr);
  324. if (startingLocation >= endOfSource)
  325. {
  326. return false;
  327. }
  328. codepoint_t ch = this->template ReadFull<bScan>(startingLocation, endOfSource);
  329. if (FBigChar(ch))
  330. {
  331. if (this->IsMultiUnitChar(static_cast<OLECHAR>(ch)))
  332. {
  333. *outContainsMultiUnitChar = true;
  334. }
  335. if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
  336. {
  337. return TryReadCodePointRest<bScan>(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar);
  338. }
  339. }
  340. else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch))
  341. {
  342. *hasEscape = true;
  343. }
  344. *outChar = ch;
  345. return true;
  346. }
  347. template <typename EncodingPolicy>
  348. tokens Scanner<EncodingPolicy>::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp)
  349. {
  350. EncodedCharPtr p = *pp;
  351. EncodedCharPtr pchMin = p;
  352. // JS6 allows unicode characters in the form of \uxxxx escape sequences
  353. // to be part of the identifier.
  354. bool fHasEscape = false;
  355. bool fHasMultiChar = false;
  356. codepoint_t codePoint = INVALID_CODEPOINT;
  357. size_t multiUnitsBeforeLast = this->m_cMultiUnits;
  358. // Check if we started the id
  359. if (!TryReadCodePoint<true>(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar))
  360. {
  361. // If no chars. could be scanned as part of the identifier, return error.
  362. return tkScanError;
  363. }
  364. Assert(codePoint < 0x110000u);
  365. if (!charClassifier->IsIdStart(codePoint))
  366. {
  367. // Put back the last character
  368. this->RestoreMultiUnits(multiUnitsBeforeLast);
  369. // If no chars. could be scanned as part of the identifier, return error.
  370. return tkScanError;
  371. }
  372. return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp);
  373. }
  374. template <typename EncodingPolicy>
  375. BOOL Scanner<EncodingPolicy>::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last)
  376. {
  377. if (EncodingPolicy::MultiUnitEncoding)
  378. {
  379. while (p < last)
  380. {
  381. EncodedChar currentChar = *p;
  382. if (this->IsMultiUnitChar(currentChar))
  383. {
  384. // multi unit character, we may not have reach the end yet
  385. return FALSE;
  386. }
  387. Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast<false>(currentChar));
  388. if (!charClassifier->IsIdContinueFast<false>(currentChar))
  389. {
  390. // only reach the end of the identifier if it is not the start of an escape sequence
  391. return currentChar != '\\';
  392. }
  393. p++;
  394. }
  395. // We have reach the end of the identifier.
  396. return TRUE;
  397. }
  398. // Not fast path for non multi unit encoding
  399. return false;
  400. }
  401. template <typename EncodingPolicy>
  402. tokens Scanner<EncodingPolicy>::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar,
  403. EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp)
  404. {
  405. EncodedCharPtr last = m_pchLast;
  406. while (true)
  407. {
  408. // Fast path for utf8, non-multi unit char and not escape
  409. if (FastIdentifierContinue(p, last))
  410. {
  411. break;
  412. }
  413. // Slow path that has to deal with multi unit encoding
  414. codepoint_t codePoint = INVALID_CODEPOINT;
  415. EncodedCharPtr pchBeforeLast = p;
  416. size_t multiUnitsBeforeLast = this->m_cMultiUnits;
  417. if (TryReadCodePoint<true>(p, last, &codePoint, &fHasEscape, &fHasMultiChar))
  418. {
  419. Assert(codePoint < 0x110000u);
  420. if (charClassifier->IsIdContinue(codePoint))
  421. {
  422. continue;
  423. }
  424. }
  425. // Put back the last character
  426. p = pchBeforeLast;
  427. this->RestoreMultiUnits(multiUnitsBeforeLast);
  428. break;
  429. }
  430. Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX);
  431. *pp = p;
  432. if (!identifyKwds)
  433. {
  434. return tkID;
  435. }
  436. // During syntax coloring, scanner doesn't need to convert the escape sequence to get actual characters, it just needs the classification information
  437. // So call up hashtables custom method to check if the string scanned is identifier or keyword.
  438. // Do the same for deferred parsing, but use a custom method that only tokenizes JS keywords.
  439. if ((m_DeferredParseFlags & ScanFlagSuppressIdPid) != 0)
  440. {
  441. m_ptoken->SetIdentifier(NULL);
  442. if (!fHasEscape)
  443. {
  444. // If there are no escape, that the main scan loop would have found the keyword already
  445. // So we can just assume it is an ID
  446. DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
  447. DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
  448. Assert(tk == tkID || (tk == tkYIELD && !m_fYieldIsKeyword) || (tk == tkAWAIT && !m_fAwaitIsKeyword));
  449. return tkID;
  450. }
  451. int32 cch = UnescapeToTempBuf(pchMin, p);
  452. tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode());
  453. return (!m_fYieldIsKeyword && tk == tkYIELD) || (!m_fAwaitIsKeyword && tk == tkAWAIT) ? tkID : tk;
  454. }
  455. else if (m_fSyntaxColor)
  456. {
  457. m_ptoken->SetIdentifier(NULL);
  458. // We always need to check TkFromNameLenColor because
  459. // the main Scan switch doesn't detect all non-keyword that needs coloring
  460. // (e.g. int)
  461. int32 cch = UnescapeToTempBuf(pchMin, p);
  462. return m_phtbl->TkFromNameLenColor(m_tempChBuf.m_prgch, cch);
  463. }
  464. // UTF16 Scanner are only for syntax coloring, so it shouldn't come here.
  465. if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape)
  466. {
  467. Assert(sizeof(EncodedChar) == 1);
  468. // If there are no escape, that the main scan loop would have found the keyword already
  469. // So we can just assume it is an ID
  470. DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
  471. DebugOnly(tokens tk = m_phtbl->TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
  472. Assert(tk == tkID || (tk == tkYIELD && !m_fYieldIsKeyword) || (tk == tkAWAIT && !m_fAwaitIsKeyword));
  473. m_ptoken->SetIdentifier(reinterpret_cast<const char *>(pchMin), (int32)(p - pchMin));
  474. return tkID;
  475. }
  476. IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar);
  477. m_ptoken->SetIdentifier(pid);
  478. if (!fHasEscape)
  479. {
  480. // If it doesn't have escape, then Scan() should have taken care of keywords (except
  481. // yield if m_fYieldIsKeyword is false, in which case yield is treated as an identifier, and except
  482. // await if m_fAwaitIsKeyword is false, in which case await is treated as an identifier).
  483. // We don't have to check if the name is reserved word and return it as an Identifier
  484. Assert(pid->Tk(IsStrictMode()) == tkID
  485. || (pid->Tk(IsStrictMode()) == tkYIELD && !m_fYieldIsKeyword)
  486. || (pid->Tk(IsStrictMode()) == tkAWAIT && !m_fAwaitIsKeyword));
  487. return tkID;
  488. }
  489. tokens tk = pid->Tk(IsStrictMode());
  490. return tk == tkID || (tk == tkYIELD && !m_fYieldIsKeyword) || (tk == tkAWAIT && !m_fAwaitIsKeyword) ? tkID : tkNone;
  491. }
  492. template <typename EncodingPolicy>
  493. IdentPtr Scanner<EncodingPolicy>::PidAt(size_t iecpMin, size_t iecpLim)
  494. {
  495. Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin);
  496. return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim);
  497. }
  498. template <typename EncodingPolicy>
  499. uint32 Scanner<EncodingPolicy>::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last)
  500. {
  501. m_tempChBuf.Init();
  502. while( p < last )
  503. {
  504. codepoint_t codePoint;
  505. bool hasEscape, isMultiChar;
  506. bool gotCodePoint = TryReadCodePoint<false>(p, last, &codePoint, &hasEscape, &isMultiChar);
  507. Assert(gotCodePoint);
  508. Assert(codePoint < 0x110000);
  509. if (codePoint < 0x10000)
  510. {
  511. m_tempChBuf.AppendCh((OLECHAR)codePoint);
  512. }
  513. else
  514. {
  515. char16 lower, upper;
  516. Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper);
  517. m_tempChBuf.AppendCh(lower);
  518. m_tempChBuf.AppendCh(upper);
  519. }
  520. }
  521. return m_tempChBuf.m_ichCur;
  522. }
  523. template <typename EncodingPolicy>
  524. IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last)
  525. {
  526. int32 cch = UnescapeToTempBuf(p, last);
  527. return m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, cch);
  528. }
  529. template <typename EncodingPolicy>
  530. IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar)
  531. {
  532. // If there is an escape sequence in the JS6 identifier or it is a UTF8
  533. // source then we have to convert it to the equivalent char so we use a
  534. // buffer for translation.
  535. if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape)
  536. {
  537. return PidOfIdentiferAt(p, last);
  538. }
  539. else if (EncodingPolicy::MultiUnitEncoding)
  540. {
  541. Assert(sizeof(EncodedChar) == 1);
  542. return m_phtbl->PidHashNameLen(reinterpret_cast<const char *>(p), reinterpret_cast<const char *>(last), (int32)(last - p));
  543. }
  544. else
  545. {
  546. Assert(sizeof(EncodedChar) == 2);
  547. return m_phtbl->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p));
  548. }
  549. }
  550. template <typename EncodingPolicy>
  551. typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)
  552. {
  553. EncodedCharPtr last = m_pchLast;
  554. EncodedCharPtr pchT;
  555. likelyInt = true;
  556. // Reset
  557. m_OctOrLeadingZeroOnLastTKNumber = false;
  558. if ('0' == this->PeekFirst(p, last))
  559. {
  560. switch(this->PeekFirst(p + 1, last))
  561. {
  562. case '.':
  563. case 'e':
  564. case 'E':
  565. likelyInt = false;
  566. // Floating point
  567. goto LFloat;
  568. case 'x':
  569. case 'X':
  570. // Hex
  571. *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT);
  572. if (pchT == p + 2)
  573. {
  574. // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
  575. *pdbl = 0;
  576. return p + 1;
  577. }
  578. else
  579. return pchT;
  580. case 'o':
  581. case 'O':
  582. // Octal
  583. *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT);
  584. if (pchT == p + 2)
  585. {
  586. // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
  587. *pdbl = 0;
  588. return p + 1;
  589. }
  590. return pchT;
  591. case 'b':
  592. case 'B':
  593. // Binary
  594. *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT);
  595. if (pchT == p + 2)
  596. {
  597. // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
  598. *pdbl = 0;
  599. return p + 1;
  600. }
  601. return pchT;
  602. default:
  603. // Octal
  604. *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT);
  605. Assert(pchT > p);
  606. #if !SOURCERELEASE
  607. // If an octal literal is malformed then it is in fact a decimal literal.
  608. #endif // !SOURCERELEASE
  609. if(*pdbl != 0 || pchT > p + 1)
  610. m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
  611. switch (*pchT)
  612. {
  613. case '8':
  614. case '9':
  615. // case 'e':
  616. // case 'E':
  617. // case '.':
  618. m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
  619. goto LFloat;
  620. }
  621. return pchT;
  622. }
  623. }
  624. else
  625. {
  626. LFloat:
  627. *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyInt);
  628. Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl));
  629. return pchT;
  630. }
  631. }
  632. template <typename EncodingPolicy>
  633. BOOL Scanner<EncodingPolicy>::oFScanNumber(double *pdbl, bool& likelyInt)
  634. {
  635. EncodedCharPtr pchT;
  636. m_OctOrLeadingZeroOnLastTKNumber = false;
  637. likelyInt = true;
  638. if ('0' == *m_currentCharacter)
  639. {
  640. switch (m_currentCharacter[1])
  641. {
  642. case '.':
  643. case 'e':
  644. case 'E':
  645. likelyInt = false;
  646. // Floating point.
  647. goto LFloat;
  648. case 'x':
  649. case 'X':
  650. // Hex.
  651. *pdbl = Js::NumberUtilities::DblFromHex<EncodedChar>(m_currentCharacter + 2, &pchT);
  652. if (pchT == m_currentCharacter + 2)
  653. {
  654. // "Octal zero token "0" followed by an identifier token beginning with character 'x'/'X'
  655. *pdbl = 0;
  656. m_currentCharacter++;
  657. }
  658. else
  659. m_currentCharacter = pchT;
  660. break;
  661. case 'o':
  662. case 'O':
  663. *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter + 2, &pchT);
  664. if (pchT == m_currentCharacter + 2)
  665. {
  666. // "Octal zero token "0" followed by an identifier token beginning with character 'o'/'O'
  667. *pdbl = 0;
  668. m_currentCharacter++;
  669. }
  670. else
  671. m_currentCharacter = pchT;
  672. break;
  673. case 'b':
  674. case 'B':
  675. *pdbl = Js::NumberUtilities::DblFromBinary(m_currentCharacter + 2, &pchT);
  676. if (pchT == m_currentCharacter + 2)
  677. {
  678. // "Octal zero token "0" followed by an identifier token beginning with character 'b'/'B'
  679. *pdbl = 0;
  680. m_currentCharacter++;
  681. }
  682. else
  683. m_currentCharacter = pchT;
  684. break;
  685. default:
  686. // Octal.
  687. *pdbl = Js::NumberUtilities::DblFromOctal(m_currentCharacter, &pchT);
  688. Assert(pchT > m_currentCharacter);
  689. #if !SOURCERELEASE
  690. // If an octal literal is malformed then it is in fact a decimal literal.
  691. #endif // !SOURCERELEASE
  692. if(*pdbl != 0 || pchT > m_currentCharacter + 1)
  693. m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok
  694. switch (*pchT)
  695. {
  696. case '8':
  697. case '9':
  698. // case 'e':
  699. // case 'E':
  700. // case '.':
  701. m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09....
  702. goto LFloat;
  703. }
  704. m_currentCharacter = pchT;
  705. break;
  706. }
  707. }
  708. else
  709. {
  710. LFloat:
  711. // Let StrToDbl do all the work.
  712. *pdbl = Js::NumberUtilities::StrToDbl(m_currentCharacter, &pchT, likelyInt);
  713. if (pchT == m_currentCharacter)
  714. return FALSE;
  715. m_currentCharacter = pchT;
  716. Assert(!Js::NumberUtilities::IsNan(*pdbl));
  717. }
  718. return TRUE;
  719. }
  720. template <typename EncodingPolicy>
  721. tokens Scanner<EncodingPolicy>::TryRescanRegExp()
  722. {
  723. EncodedCharPtr current = m_currentCharacter;
  724. tokens result = RescanRegExp();
  725. if (result == tkScanError)
  726. m_currentCharacter = current;
  727. return result;
  728. }
  729. template <typename EncodingPolicy>
  730. tokens Scanner<EncodingPolicy>::RescanRegExp()
  731. {
  732. #if DEBUG
  733. switch (m_ptoken->tk)
  734. {
  735. case tkDiv:
  736. Assert(m_currentCharacter == m_pchMinTok + 1);
  737. break;
  738. case tkAsgDiv:
  739. Assert(m_currentCharacter == m_pchMinTok + 2);
  740. break;
  741. default:
  742. AssertMsg(FALSE, "Who is calling RescanRegExp?");
  743. break;
  744. }
  745. #endif //DEBUG
  746. m_currentCharacter = m_pchMinTok;
  747. if (*m_currentCharacter != '/')
  748. Error(ERRnoSlash);
  749. m_currentCharacter++;
  750. tokens tk = tkNone;
  751. {
  752. ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
  753. tk = ScanRegExpConstant(&alloc);
  754. }
  755. return tk;
  756. }
  757. template <typename EncodingPolicy>
  758. tokens Scanner<EncodingPolicy>::RescanRegExpNoAST()
  759. {
  760. #if DEBUG
  761. switch (m_ptoken->tk)
  762. {
  763. case tkDiv:
  764. Assert(m_currentCharacter == m_pchMinTok + 1);
  765. break;
  766. case tkAsgDiv:
  767. Assert(m_currentCharacter == m_pchMinTok + 2);
  768. break;
  769. default:
  770. AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
  771. break;
  772. }
  773. #endif //DEBUG
  774. m_currentCharacter = m_pchMinTok;
  775. if (*m_currentCharacter != '/')
  776. Error(ERRnoSlash);
  777. m_currentCharacter++;
  778. tokens tk = tkNone;
  779. {
  780. ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
  781. {
  782. tk = ScanRegExpConstantNoAST(&alloc);
  783. }
  784. }
  785. return tk;
  786. }
  787. template <typename EncodingPolicy>
  788. tokens Scanner<EncodingPolicy>::RescanRegExpTokenizer()
  789. {
  790. #if DEBUG
  791. switch (m_ptoken->tk)
  792. {
  793. case tkDiv:
  794. Assert(m_currentCharacter == m_pchMinTok + 1);
  795. break;
  796. case tkAsgDiv:
  797. Assert(m_currentCharacter == m_pchMinTok + 2);
  798. break;
  799. default:
  800. AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
  801. break;
  802. }
  803. #endif //DEBUG
  804. m_currentCharacter = m_pchMinTok;
  805. if (*m_currentCharacter != '/')
  806. Error(ERRnoSlash);
  807. m_currentCharacter++;
  808. tokens tk = tkNone;
  809. ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread();
  810. threadContext->EnsureRecycler();
  811. Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp"));
  812. TryFinally(
  813. [&]() /* try block */
  814. {
  815. tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator());
  816. },
  817. [&](bool /* hasException */) /* finally block */
  818. {
  819. threadContext->ReleaseTemporaryAllocator(alloc);
  820. });
  821. return tk;
  822. }
  823. template <typename EncodingPolicy>
  824. tokens Scanner<EncodingPolicy>::ScanRegExpConstant(ArenaAllocator* alloc)
  825. {
  826. if (m_parser && m_parser->IsBackgroundParser())
  827. {
  828. PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
  829. }
  830. else
  831. {
  832. PROBE_STACK(m_scriptContext, Js::Constants::MinStackRegex);
  833. }
  834. // SEE ALSO: RegexHelper::PrimCompileDynamic()
  835. #ifdef PROFILE_EXEC
  836. m_scriptContext->ProfileBegin(Js::RegexCompilePhase);
  837. #endif
  838. ArenaAllocator* ctAllocator = alloc;
  839. UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
  840. UnifiedRegex::StandardChars<char16>* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  841. #if ENABLE_REGEX_CONFIG_OPTIONS
  842. UnifiedRegex::DebugWriter *w = 0;
  843. if (REGEX_CONFIG_FLAG(RegexDebug))
  844. w = m_scriptContext->GetRegexDebugWriter();
  845. if (REGEX_CONFIG_FLAG(RegexProfile))
  846. m_scriptContext->GetRegexStatsDatabase()->BeginProfile();
  847. #endif
  848. UnifiedRegex::Node* root = 0;
  849. charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
  850. UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
  851. UnifiedRegex::Parser<EncodingPolicy, true> parser
  852. ( m_scriptContext
  853. , ctAllocator
  854. , standardEncodedChars
  855. , standardChars
  856. , this->IsFromExternalSource()
  857. #if ENABLE_REGEX_CONFIG_OPTIONS
  858. , w
  859. #endif
  860. );
  861. try
  862. {
  863. root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags);
  864. }
  865. catch (UnifiedRegex::ParseError e)
  866. {
  867. #ifdef PROFILE_EXEC
  868. m_scriptContext->ProfileEnd(Js::RegexCompilePhase);
  869. #endif
  870. if (m_fSyntaxColor)
  871. return ScanError(m_currentCharacter + e.encodedPos, tkRegExp);
  872. m_currentCharacter += e.encodedPos;
  873. Error(e.error);
  874. }
  875. UnifiedRegex::RegexPattern* pattern;
  876. if (m_parser->IsBackgroundParser())
  877. {
  878. // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern
  879. // and hook it to this parse node.
  880. pattern = parser.template CompileProgram<false>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
  881. }
  882. else
  883. {
  884. pattern = parser.template CompileProgram<true>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
  885. }
  886. this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
  887. return m_ptoken->SetRegex(pattern, m_parser);
  888. }
  889. template<typename EncodingPolicy>
  890. tokens Scanner<EncodingPolicy>::ScanRegExpConstantNoAST(ArenaAllocator* alloc)
  891. {
  892. if (m_parser && m_parser->IsBackgroundParser())
  893. {
  894. PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);
  895. }
  896. else
  897. {
  898. PROBE_STACK(m_scriptContext, Js::Constants::MinStackRegex);
  899. }
  900. ThreadContext *threadContext = m_fSyntaxColor ? ThreadContext::GetContextForCurrentThread() : m_scriptContext->GetThreadContext();
  901. UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0);
  902. UnifiedRegex::StandardChars<char16>* standardChars = threadContext->GetStandardChars((char16*)0);
  903. charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
  904. UnifiedRegex::Parser<EncodingPolicy, true> parser
  905. ( m_scriptContext
  906. , alloc
  907. , standardEncodedChars
  908. , standardChars
  909. , this->IsFromExternalSource()
  910. #if ENABLE_REGEX_CONFIG_OPTIONS
  911. , 0
  912. #endif
  913. );
  914. try
  915. {
  916. parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars);
  917. }
  918. catch (UnifiedRegex::ParseError e)
  919. {
  920. if (m_fSyntaxColor)
  921. return ScanError(m_currentCharacter + e.encodedPos, tkRegExp);
  922. m_currentCharacter += e.encodedPos;
  923. Error(e.error);
  924. // never reached
  925. }
  926. UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram<false>(nullptr, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, UnifiedRegex::NoRegexFlags);
  927. Assert(pattern == nullptr); // BuildAST == false, CompileProgram should return nullptr
  928. this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits
  929. return (m_ptoken->tk = tkRegExp);
  930. }
  931. template<typename EncodingPolicy>
  932. tokens Scanner<EncodingPolicy>::ScanStringTemplateBegin(EncodedCharPtr *pp)
  933. {
  934. // String template must begin with a string constant followed by '`' or '${'
  935. ScanStringConstant<true, true>('`', pp);
  936. OLECHAR ch;
  937. EncodedCharPtr last = m_pchLast;
  938. ch = this->ReadFirst(*pp, last);
  939. if (ch == '`')
  940. {
  941. // Simple string template - no substitutions
  942. return tkStrTmplBasic;
  943. }
  944. else if (ch == '$')
  945. {
  946. ch = this->ReadFirst(*pp, last);
  947. if (ch == '{')
  948. {
  949. // Next token after expr should be tkStrTmplMid or tkStrTmplEnd.
  950. // In string template scanning mode, we expect the next char to be '}'
  951. // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid
  952. m_fStringTemplateDepth++;
  953. // Regular string template begin - next is first substitution
  954. return tkStrTmplBegin;
  955. }
  956. }
  957. // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
  958. (*pp)--;
  959. return ScanError(m_currentCharacter, tkStrTmplBegin);
  960. }
  961. template<typename EncodingPolicy>
  962. tokens Scanner<EncodingPolicy>::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp)
  963. {
  964. // String template middle and end tokens must begin with a string constant
  965. ScanStringConstant<true, true>('`', pp);
  966. OLECHAR ch;
  967. EncodedCharPtr last = m_pchLast;
  968. ch = this->ReadFirst(*pp, last);
  969. if (ch == '`')
  970. {
  971. // No longer in string template scanning mode
  972. m_fStringTemplateDepth--;
  973. // This is the last part of the template ...`
  974. return tkStrTmplEnd;
  975. }
  976. else if (ch == '$')
  977. {
  978. ch = this->ReadFirst(*pp, last);
  979. if (ch == '{')
  980. {
  981. // This is just another middle part of the template }...${
  982. return tkStrTmplMid;
  983. }
  984. }
  985. // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
  986. (*pp)--;
  987. return ScanError(m_currentCharacter, tkStrTmplEnd);
  988. }
  989. /*****************************************************************************
  990. *
  991. * Parses a string constant. Note that the string value is stored in
  992. * a volatile buffer (or allocated on the heap if too long), and thus
  993. * the string should be saved off before the next token is scanned.
  994. */
  995. template<typename EncodingPolicy>
  996. template<bool stringTemplateMode, bool createRawString>
  997. tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
  998. {
  999. static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value");
  1000. OLECHAR ch, c, rawch;
  1001. int wT;
  1002. EncodedCharPtr p = *pp;
  1003. EncodedCharPtr last = m_pchLast;
  1004. // Reset
  1005. m_OctOrLeadingZeroOnLastTKNumber = false;
  1006. m_EscapeOnLastTkStrCon = FALSE;
  1007. m_tempChBuf.Init();
  1008. // Use template parameter to gate raw string creation.
  1009. // If createRawString is false, all these operations should be no-ops
  1010. if (createRawString)
  1011. {
  1012. m_tempChBufSecondary.Init();
  1013. }
  1014. for (;;)
  1015. {
  1016. switch ((rawch = ch = this->ReadFirst(p, last)))
  1017. {
  1018. case kchRET:
  1019. if (stringTemplateMode)
  1020. {
  1021. if (this->PeekFirst(p, last) == kchNWL)
  1022. {
  1023. // Eat the <LF> char, ignore return
  1024. this->ReadFirst(p, last);
  1025. }
  1026. // Both <CR> and <CR><LF> are normalized to <LF> in template cooked and raw values
  1027. ch = rawch = kchNWL;
  1028. }
  1029. LEcmaLineBreak:
  1030. // Fall through
  1031. case kchNWL:
  1032. if (stringTemplateMode)
  1033. {
  1034. // Notify the scanner to update current line, number of lines etc
  1035. NotifyScannedNewLine();
  1036. break;
  1037. }
  1038. m_currentCharacter = p - 1;
  1039. if (m_fSyntaxColor)
  1040. {
  1041. *pp = p - 1;
  1042. return ScanError(p - 1, tkStrCon);
  1043. }
  1044. Error(ERRnoStrEnd);
  1045. case '"':
  1046. case '\'':
  1047. if (ch == delim)
  1048. goto LBreak;
  1049. break;
  1050. case '`':
  1051. // In string template scan mode, don't consume the '`' - we need to differentiate
  1052. // between a closed string template and the expression open sequence - ${
  1053. if (stringTemplateMode)
  1054. {
  1055. p--;
  1056. goto LBreak;
  1057. }
  1058. // If we aren't scanning for a string template, do the default thing
  1059. goto LMainDefault;
  1060. case '$':
  1061. // If we are parsing a string literal part of a string template, ${ indicates we need to switch
  1062. // to parsing an expression.
  1063. if (stringTemplateMode && this->PeekFirst(p, last) == '{')
  1064. {
  1065. // Rewind to the $ and return
  1066. p--;
  1067. goto LBreak;
  1068. }
  1069. // If we aren't scanning for a string template, do the default thing
  1070. goto LMainDefault;
  1071. case kchNUL:
  1072. if (p >= last)
  1073. {
  1074. m_currentCharacter = p - 1;
  1075. if (m_fSyntaxColor)
  1076. {
  1077. *pp = p - 1;
  1078. return ScanError(p - 1, tkStrCon);
  1079. }
  1080. Error(ERRnoStrEnd);
  1081. }
  1082. break;
  1083. default:
  1084. LMainDefault:
  1085. if (this->IsMultiUnitChar(ch))
  1086. {
  1087. if ((ch == kchLS || ch == kchPS))
  1088. {
  1089. goto LEcmaLineBreak;
  1090. }
  1091. rawch = ch = this->template ReadRest<true>(ch, p, last);
  1092. switch (ch)
  1093. {
  1094. case kchLS: // 0x2028, classifies as new line
  1095. case kchPS: // 0x2029, classifies as new line
  1096. goto LEcmaLineBreak;
  1097. }
  1098. }
  1099. break;
  1100. case kchBSL:
  1101. // In raw mode '\\' is not an escape character, just add the char into the raw buffer.
  1102. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1103. m_EscapeOnLastTkStrCon=TRUE;
  1104. // In raw mode, we append the raw char itself and not the escaped value so save the char.
  1105. rawch = ch = this->ReadFirst(p, last);
  1106. codepoint_t codePoint = 0;
  1107. uint errorType = (uint)ERRbadHexDigit;
  1108. switch (ch)
  1109. {
  1110. case 'b':
  1111. ch = 0x08;
  1112. break;
  1113. case 't':
  1114. ch = 0x09;
  1115. break;
  1116. case 'v':
  1117. ch = 0x0B; //Only in ES5 mode
  1118. break; //same as default
  1119. case 'n':
  1120. ch = 0x0A;
  1121. break;
  1122. case 'f':
  1123. ch = 0x0C;
  1124. break;
  1125. case 'r':
  1126. ch = 0x0D;
  1127. break;
  1128. case 'x':
  1129. // Insert the 'x' here before jumping to parse the hex digits.
  1130. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1131. // 2 hex digits
  1132. ch = 0;
  1133. goto LTwoHex;
  1134. case 'u':
  1135. // Raw string just inserts a 'u' here.
  1136. m_tempChBufSecondary.template AppendCh<createRawString>(ch);
  1137. ch = 0;
  1138. if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1139. goto LFourHex;
  1140. else if (c != '{' || !this->es6UnicodeMode)
  1141. goto ReturnScanError;
  1142. Assert(c == '{');
  1143. // c should definitely be a '{' which should be appended to the raw string.
  1144. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1145. //At least one digit is expected
  1146. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1147. {
  1148. goto ReturnScanError;
  1149. }
  1150. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1151. codePoint = static_cast<codepoint_t>(wT);
  1152. while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1153. {
  1154. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1155. codePoint <<= 4;
  1156. codePoint += static_cast<codepoint_t>(wT);
  1157. if (codePoint > 0x10FFFF)
  1158. {
  1159. errorType = (uint)ERRInvalidCodePoint;
  1160. goto ReturnScanError;
  1161. }
  1162. }
  1163. if (c != '}')
  1164. {
  1165. errorType = (uint)ERRMissingCurlyBrace;
  1166. goto ReturnScanError;
  1167. }
  1168. Assert(codePoint <= 0x10FFFF);
  1169. if (codePoint >= 0x10000)
  1170. {
  1171. OLECHAR lower = 0;
  1172. Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch);
  1173. m_tempChBuf.AppendCh(lower);
  1174. }
  1175. else
  1176. {
  1177. ch = (char16)codePoint;
  1178. }
  1179. // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
  1180. if (createRawString)
  1181. rawch = c;
  1182. break;
  1183. LFourHex:
  1184. codePoint = 0x0;
  1185. // Append first hex digit character to the raw string.
  1186. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1187. codePoint += static_cast<codepoint_t>(wT * 0x1000);
  1188. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1189. goto ReturnScanError;
  1190. // Append fourth (or second) hex digit character to the raw string.
  1191. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1192. codePoint += static_cast<codepoint_t>(wT * 0x0100);
  1193. LTwoHex:
  1194. // This code path doesn't expect curly.
  1195. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1196. goto ReturnScanError;
  1197. // Append first hex digit character to the raw string.
  1198. m_tempChBufSecondary.template AppendCh<createRawString>(c);
  1199. codePoint += static_cast<codepoint_t>(wT * 0x0010);
  1200. if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
  1201. goto ReturnScanError;
  1202. codePoint += static_cast<codepoint_t>(wT);
  1203. // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
  1204. if (createRawString)
  1205. rawch = c;
  1206. if (codePoint < 0x10000)
  1207. {
  1208. ch = static_cast<OLECHAR>(codePoint);
  1209. }
  1210. else
  1211. {
  1212. goto ReturnScanError;
  1213. }
  1214. break;
  1215. case '0':
  1216. case '1':
  1217. case '2':
  1218. case '3':
  1219. // 1 to 3 octal digits
  1220. ch -= '0';
  1221. // Octal escape sequences are not allowed inside string template literals
  1222. if (stringTemplateMode)
  1223. {
  1224. c = this->PeekFirst(p, last);
  1225. if (ch != 0 || (c >= '0' && c <= '7'))
  1226. {
  1227. errorType = (uint)ERRES5NoOctal;
  1228. goto ReturnScanError;
  1229. }
  1230. break;
  1231. }
  1232. wT = (c = this->ReadFirst(p, last)) - '0';
  1233. if ((char16)wT > 7)
  1234. {
  1235. if (ch != 0 || ((char16)wT <= 9))
  1236. {
  1237. m_OctOrLeadingZeroOnLastTKNumber = true;
  1238. }
  1239. p--;
  1240. break;
  1241. }
  1242. m_OctOrLeadingZeroOnLastTKNumber = true;
  1243. ch = static_cast< OLECHAR >(ch * 8 + wT);
  1244. goto LOneOctal;
  1245. case '4':
  1246. case '5':
  1247. case '6':
  1248. case '7':
  1249. // 1 to 2 octal digits
  1250. // Octal escape sequences are not allowed inside string template literals
  1251. if (stringTemplateMode)
  1252. {
  1253. errorType = (uint)ERRES5NoOctal;
  1254. goto ReturnScanError;
  1255. }
  1256. ch -= '0';
  1257. m_OctOrLeadingZeroOnLastTKNumber = true;
  1258. LOneOctal:
  1259. wT = (c = this->ReadFirst(p, last)) - '0';
  1260. if ((char16)wT > 7)
  1261. {
  1262. p--;
  1263. break;
  1264. }
  1265. ch = static_cast< OLECHAR >(ch * 8 + wT);
  1266. break;
  1267. case kchRET: // 0xD
  1268. if (stringTemplateMode)
  1269. {
  1270. // If this is \<CR><LF> we can eat the <LF> right now
  1271. if (this->PeekFirst(p, last) == kchNWL)
  1272. {
  1273. // Eat the <LF> char, ignore return
  1274. this->ReadFirst(p, last);
  1275. }
  1276. // Both \<CR> and \<CR><LF> are normalized to \<LF> in template raw string
  1277. rawch = kchNWL;
  1278. }
  1279. case kchLS: // 0x2028, classifies as new line
  1280. case kchPS: // 0x2029, classifies as new line
  1281. case kchNWL: // 0xA
  1282. LEcmaEscapeLineBreak:
  1283. if (stringTemplateMode)
  1284. {
  1285. // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings
  1286. m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
  1287. // Template literal strings ignore all escaped line continuation tokens
  1288. NotifyScannedNewLine();
  1289. continue;
  1290. }
  1291. m_currentCharacter = p;
  1292. ScanNewLine(ch);
  1293. p = m_currentCharacter;
  1294. if (m_fSyntaxColor && *p == 0)
  1295. {
  1296. // Special case for multi-line strings during colorization.
  1297. m_scanState = delim == '"' ? ScanStateMultiLineDoubleQuoteString : ScanStateMultiLineSingleQuoteString;
  1298. *pp = p;
  1299. return tkStrCon;
  1300. }
  1301. continue;
  1302. case 0:
  1303. if (p >= last)
  1304. {
  1305. errorType = (uint)ERRnoStrEnd;
  1306. ReturnScanError:
  1307. m_currentCharacter = p - 1;
  1308. if (m_fSyntaxColor)
  1309. {
  1310. *pp = p - 1;
  1311. return ScanError(p - 1, tkStrCon);
  1312. }
  1313. Error(errorType);
  1314. }
  1315. else if (stringTemplateMode)
  1316. {
  1317. // Escaped null character is translated into 0x0030 for raw template literals
  1318. rawch = 0x0030;
  1319. }
  1320. break;
  1321. default:
  1322. if (this->IsMultiUnitChar(ch))
  1323. {
  1324. rawch = ch = this->template ReadRest<true>(ch, p, last);
  1325. switch (ch)
  1326. {
  1327. case kchLS:
  1328. case kchPS:
  1329. goto LEcmaEscapeLineBreak;
  1330. }
  1331. }
  1332. break;
  1333. }
  1334. break;
  1335. }
  1336. m_tempChBuf.AppendCh(ch);
  1337. m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
  1338. }
  1339. LBreak:
  1340. bool createPid = true;
  1341. if (m_fSyntaxColor || (m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
  1342. {
  1343. createPid = false;
  1344. if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR))))
  1345. {
  1346. createPid = true;
  1347. }
  1348. }
  1349. if (createPid)
  1350. {
  1351. m_ptoken->SetIdentifier(m_phtbl->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur));
  1352. }
  1353. else
  1354. {
  1355. m_ptoken->SetIdentifier(NULL);
  1356. }
  1357. m_scanState = ScanStateNormal;
  1358. m_doubleQuoteOnLastTkStrCon = '"' == delim;
  1359. *pp = p;
  1360. return tkStrCon;
  1361. }
  1362. template<typename EncodingPolicy>
  1363. tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
  1364. {
  1365. return ScanStringConstant<false, false>(delim, pp);
  1366. }
  1367. /*****************************************************************************
  1368. *
  1369. * Consume a C-style comment.
  1370. */
  1371. template<typename EncodingPolicy>
  1372. tokens Scanner<EncodingPolicy>::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef)
  1373. {
  1374. Assert(containTypeDef != nullptr);
  1375. EncodedCharPtr p = *pp;
  1376. *containTypeDef = false;
  1377. EncodedCharPtr last = m_pchLast;
  1378. OLECHAR ch;
  1379. for (;;)
  1380. {
  1381. switch((ch = this->ReadFirst(p, last)))
  1382. {
  1383. case '*':
  1384. if (*p == '/')
  1385. {
  1386. *pp = p + 1;
  1387. if (m_fSyntaxColor)
  1388. {
  1389. m_scanState = ScanStateNormal;
  1390. return tkComment;
  1391. }
  1392. return tkNone;
  1393. }
  1394. break;
  1395. // ES 2015 11.3 Line Terminators
  1396. case kchLS: // 0x2028, classifies as new line
  1397. case kchPS: // 0x2029, classifies as new line
  1398. LEcmaLineBreak:
  1399. goto LLineBreak;
  1400. case kchRET:
  1401. case kchNWL:
  1402. LLineBreak:
  1403. m_fHadEol = TRUE;
  1404. m_currentCharacter = p;
  1405. ScanNewLine(ch);
  1406. p = m_currentCharacter;
  1407. break;
  1408. case kchNUL:
  1409. if (p >= last)
  1410. {
  1411. m_currentCharacter = p - 1;
  1412. *pp = p - 1;
  1413. if (m_fSyntaxColor)
  1414. {
  1415. m_scanState = ScanStateMultiLineComment;
  1416. return tkComment;
  1417. }
  1418. Error(ERRnoCmtEnd);
  1419. }
  1420. break;
  1421. default:
  1422. if (this->IsMultiUnitChar(ch))
  1423. {
  1424. ch = this->template ReadRest<true>(ch, p, last);
  1425. switch (ch)
  1426. {
  1427. case kchLS:
  1428. case kchPS:
  1429. goto LEcmaLineBreak;
  1430. }
  1431. }
  1432. break;
  1433. }
  1434. }
  1435. }
  1436. /*****************************************************************************
  1437. *
  1438. * We've encountered a newline - update various counters and things.
  1439. */
  1440. template<typename EncodingPolicy>
  1441. void Scanner<EncodingPolicy>::ScanNewLine(uint ch)
  1442. {
  1443. if (ch == '\r' && PeekNextChar() == '\n')
  1444. {
  1445. ReadNextChar();
  1446. }
  1447. NotifyScannedNewLine();
  1448. }
  1449. /*****************************************************************************
  1450. *
  1451. * We've encountered a newline - update various counters and things.
  1452. */
  1453. template<typename EncodingPolicy>
  1454. void Scanner<EncodingPolicy>::NotifyScannedNewLine()
  1455. {
  1456. // update in scanner: previous line, current line, number of lines.
  1457. m_line++;
  1458. m_pchPrevLine = m_pchMinLine;
  1459. m_pchMinLine = m_currentCharacter;
  1460. m_cMinLineMultiUnits = this->m_cMultiUnits;
  1461. }
  1462. /*****************************************************************************
  1463. *
  1464. * Delivers a token stream.
  1465. */
  1466. template<typename EncodingPolicy>
  1467. tokens Scanner<EncodingPolicy>::ScanForcingPid()
  1468. {
  1469. if (m_DeferredParseFlags != ScanFlagNone)
  1470. {
  1471. BYTE deferredParseFlagsSave = m_DeferredParseFlags;
  1472. m_DeferredParseFlags = ScanFlagNone;
  1473. tokens result = tkEOF;
  1474. TryFinally(
  1475. [&]() /* try block */
  1476. {
  1477. result = this->Scan();
  1478. },
  1479. [&](bool) /* finally block */
  1480. {
  1481. this->m_DeferredParseFlags = deferredParseFlagsSave;
  1482. });
  1483. return result;
  1484. }
  1485. return Scan();
  1486. }
  1487. template<typename EncodingPolicy>
  1488. tokens Scanner<EncodingPolicy>::Scan()
  1489. {
  1490. return ScanCore(true);
  1491. }
  1492. template<typename EncodingPolicy>
  1493. tokens Scanner<EncodingPolicy>::ScanNoKeywords()
  1494. {
  1495. return ScanCore(false);
  1496. }
  1497. template<typename EncodingPolicy>
  1498. tokens Scanner<EncodingPolicy>::ScanAhead()
  1499. {
  1500. return ScanNoKeywords();
  1501. }
  1502. template<typename EncodingPolicy>
  1503. tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
  1504. {
  1505. codepoint_t ch;
  1506. OLECHAR firstChar;
  1507. OLECHAR secondChar;
  1508. EncodedCharPtr pchT;
  1509. size_t multiUnits = 0;
  1510. EncodedCharPtr p = m_currentCharacter;
  1511. EncodedCharPtr last = m_pchLast;
  1512. bool seenDelimitedCommentEnd = false;
  1513. // store the last token
  1514. m_tkPrevious = m_ptoken->tk;
  1515. m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas
  1516. if (p >= last)
  1517. {
  1518. m_pchMinTok = p;
  1519. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1520. goto LEof;
  1521. }
  1522. tokens token;
  1523. m_fHadEol = FALSE;
  1524. CharTypes chType;
  1525. charcount_t commentStartLine;
  1526. if (m_scanState && *p != 0)
  1527. {
  1528. if (m_fSyntaxColor)
  1529. {
  1530. firstChar = 0;
  1531. secondChar = 0;
  1532. m_pchMinTok = p;
  1533. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1534. switch (m_scanState)
  1535. {
  1536. case ScanStateMultiLineComment:
  1537. goto LMultiLineComment;
  1538. case ScanStateMultiLineSingleQuoteString:
  1539. ch = '\'';
  1540. m_scanState = ScanStateNormal;
  1541. goto LScanStringConstant;
  1542. case ScanStateMultiLineDoubleQuoteString:
  1543. ch = '"';
  1544. m_scanState = ScanStateNormal;
  1545. goto LScanStringConstant;
  1546. }
  1547. }
  1548. if (m_scanState == ScanStateStringTemplateMiddleOrEnd)
  1549. {
  1550. AssertMsg(m_fStringTemplateDepth > 0,
  1551. "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template");
  1552. m_scanState = ScanStateNormal;
  1553. pchT = p;
  1554. token = ScanStringTemplateMiddleOrEnd(&pchT);
  1555. p = pchT;
  1556. goto LDone;
  1557. }
  1558. }
  1559. for (;;)
  1560. {
  1561. LLoop:
  1562. m_pchMinTok = p;
  1563. m_cMinTokMultiUnits = this->m_cMultiUnits;
  1564. ch = this->ReadFirst(p, last);
  1565. #if DEBUG
  1566. chType = this->charClassifier->GetCharType((OLECHAR)ch);
  1567. #endif
  1568. switch (ch)
  1569. {
  1570. default:
  1571. if (ch == kchLS ||
  1572. ch == kchPS )
  1573. {
  1574. goto LNewLine;
  1575. }
  1576. {
  1577. BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch);
  1578. if (isMultiUnit)
  1579. {
  1580. ch = this->template ReadRest<true>((OLECHAR)ch, p, last);
  1581. }
  1582. if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
  1583. {
  1584. codepoint_t upper = this->PeekFull(p, last);
  1585. if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
  1586. {
  1587. // Consume the rest of the utf8 bytes for the codepoint
  1588. OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);
  1589. Assert(decodedUpper == (OLECHAR) upper);
  1590. ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);
  1591. }
  1592. }
  1593. if (this->charClassifier->IsIdStart(ch))
  1594. {
  1595. // We treat IDContinue as an error.
  1596. token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p);
  1597. break;
  1598. }
  1599. }
  1600. chType = this->charClassifier->GetCharType(ch);
  1601. switch (chType)
  1602. {
  1603. case _C_WSP: continue;
  1604. case _C_NWL: goto LNewLine;
  1605. // All other types (except errors) are handled by the outer switch.
  1606. }
  1607. Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG);
  1608. if (m_fSyntaxColor)
  1609. {
  1610. // No need to decrement the current position pointer as scanner will continue with scan next character onwards
  1611. return ScanError(p, tkID);
  1612. }
  1613. m_currentCharacter = p - 1;
  1614. Error(ERRillegalChar);
  1615. continue;
  1616. case '\0':
  1617. // Put back the null in case we get called again.
  1618. p--;
  1619. LEof:
  1620. token = tkEOF;
  1621. if (p + 1 < last)
  1622. {
  1623. if (m_fSyntaxColor)
  1624. {
  1625. return ScanError(p + 1, tkID);
  1626. }
  1627. // A \0 prior to the end of the text is an invalid character.
  1628. Error(ERRillegalChar);
  1629. }
  1630. break;
  1631. case 0x0009:
  1632. case 0x000B:
  1633. case 0x000C:
  1634. case 0x0020:
  1635. Assert(chType == _C_WSP);
  1636. continue;
  1637. case '.':
  1638. if (!Js::NumberUtilities::IsDigit(*p))
  1639. {
  1640. // Not a double
  1641. if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() &&
  1642. this->PeekFirst(p, last) == '.' &&
  1643. this->PeekFirst(p + 1, last) == '.')
  1644. {
  1645. token = tkEllipsis;
  1646. p += 2;
  1647. }
  1648. else
  1649. {
  1650. token = tkDot;
  1651. }
  1652. break;
  1653. }
  1654. // May be a double, fall through
  1655. case '0': case '1': case '2': case '3': case '4':
  1656. case '5': case '6': case '7': case '8': case '9':
  1657. {
  1658. double dbl;
  1659. Assert(chType == _C_DIG || chType == _C_DOT);
  1660. p = m_pchMinTok;
  1661. this->RestoreMultiUnits(m_cMinTokMultiUnits);
  1662. bool likelyInt = true;
  1663. pchT = FScanNumber(p, &dbl, likelyInt);
  1664. if (p == pchT)
  1665. {
  1666. Assert(this->PeekFirst(p, last) != '.');
  1667. if (m_fSyntaxColor)
  1668. {
  1669. return ScanError(m_currentCharacter + 1, tkFltCon);
  1670. }
  1671. Error(ERRbadNumber);
  1672. }
  1673. Assert(!Js::NumberUtilities::IsNan(dbl));
  1674. p = pchT;
  1675. int32 value;
  1676. if (likelyInt && Js::NumberUtilities::FDblIsInt32(dbl, &value))
  1677. {
  1678. m_ptoken->SetLong(value);
  1679. token = tkIntCon;
  1680. }
  1681. else
  1682. {
  1683. token = tkFltCon;
  1684. m_ptoken->SetDouble(dbl, likelyInt);
  1685. }
  1686. break;
  1687. }
  1688. case '(': Assert(chType == _C_LPR); token = tkLParen; break;
  1689. case ')': Assert(chType == _C_RPR); token = tkRParen; break;
  1690. case ',': Assert(chType == _C_CMA); token = tkComma; break;
  1691. case ';': Assert(chType == _C_SMC); token = tkSColon; break;
  1692. case '[': Assert(chType == _C_LBR); token = tkLBrack; break;
  1693. case ']': Assert(chType == _C_RBR); token = tkRBrack; break;
  1694. case '~': Assert(chType == _C_TIL); token = tkTilde; break;
  1695. case '?': Assert(chType == _C_QUE); token = tkQMark; break;
  1696. case '{': Assert(chType == _C_LC); token = tkLCurly; break;
  1697. // ES 2015 11.3 Line Terminators
  1698. case '\r':
  1699. case '\n':
  1700. // kchLS:
  1701. // kchPS:
  1702. LNewLine:
  1703. m_currentCharacter = p;
  1704. ScanNewLine(ch);
  1705. p = m_currentCharacter;
  1706. m_fHadEol = TRUE;
  1707. continue;
  1708. LReserved:
  1709. {
  1710. // We will derive the PID from the token
  1711. Assert(token < tkID);
  1712. m_ptoken->SetIdentifier(NULL);
  1713. goto LDone;
  1714. }
  1715. LEval:
  1716. {
  1717. token = tkID;
  1718. if (!this->m_parser) goto LIdentifier;
  1719. m_ptoken->SetIdentifier(this->m_parser->GetEvalPid());
  1720. goto LDone;
  1721. }
  1722. LArguments:
  1723. {
  1724. token = tkID;
  1725. if (!this->m_parser) goto LIdentifier;
  1726. m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid());
  1727. goto LDone;
  1728. }
  1729. LTarget:
  1730. {
  1731. token = tkID;
  1732. if (!this->m_parser) goto LIdentifier;
  1733. m_ptoken->SetIdentifier(this->m_parser->GetTargetPid());
  1734. goto LDone;
  1735. }
  1736. #include "kwd-swtch.h"
  1737. case 'A': case 'B': case 'C': case 'D': case 'E':
  1738. case 'F': case 'G': case 'H': case 'I': case 'J':
  1739. case 'K': case 'L': case 'M': case 'N': case 'O':
  1740. case 'P': case 'Q': case 'R': case 'S': case 'T':
  1741. case 'U': case 'V': case 'W': case 'X': case 'Y':
  1742. case 'Z':
  1743. // Lower-case letters handled in kwd-swtch.h above during reserved word recognition.
  1744. case '$': case '_':
  1745. LIdentifier:
  1746. Assert(this->charClassifier->IsIdStart(ch));
  1747. Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch));
  1748. token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p);
  1749. break;
  1750. case '`':
  1751. Assert(chType == _C_BKQ);
  1752. pchT = p;
  1753. token = ScanStringTemplateBegin(&pchT);
  1754. p = pchT;
  1755. break;
  1756. case '}':
  1757. Assert(chType == _C_RC);
  1758. token = tkRCurly;
  1759. break;
  1760. case '\\':
  1761. pchT = p - 1;
  1762. token = ScanIdentifier(identifyKwds, &pchT);
  1763. if (tkScanError == token)
  1764. {
  1765. m_currentCharacter = p;
  1766. if (m_fSyntaxColor)
  1767. return ScanError(p, tkID);
  1768. Error(ERRillegalChar);
  1769. }
  1770. p = pchT;
  1771. break;
  1772. case ':':
  1773. token = tkColon;
  1774. break;
  1775. case '=':
  1776. token = tkAsg;
  1777. switch (this->PeekFirst(p, last))
  1778. {
  1779. case '=':
  1780. p++;
  1781. token = tkEQ;
  1782. if (this->PeekFirst(p, last) == '=')
  1783. {
  1784. p++;
  1785. token = tkEqv;
  1786. }
  1787. break;
  1788. case '>':
  1789. p++;
  1790. token = tkDArrow;
  1791. break;
  1792. }
  1793. break;
  1794. case '!':
  1795. token = tkBang;
  1796. if (this->PeekFirst(p, last) == '=')
  1797. {
  1798. p++;
  1799. token = tkNE;
  1800. if (this->PeekFirst(p, last) == '=')
  1801. {
  1802. p++;
  1803. token = tkNEqv;
  1804. }
  1805. }
  1806. break;
  1807. case '+':
  1808. token = tkAdd;
  1809. switch (this->PeekFirst(p, last))
  1810. {
  1811. case '=':
  1812. p++;
  1813. token = tkAsgAdd;
  1814. break;
  1815. case '+':
  1816. p++;
  1817. token = tkInc;
  1818. break;
  1819. }
  1820. break;
  1821. case '-':
  1822. token = tkSub;
  1823. switch (this->PeekFirst(p, last))
  1824. {
  1825. case '=':
  1826. p++;
  1827. token = tkAsgSub;
  1828. break;
  1829. case '-':
  1830. p++;
  1831. token = tkDec;
  1832. if (!m_fIsModuleCode)
  1833. {
  1834. if ('>' == this->PeekFirst(p, last) && (m_fHadEol || seenDelimitedCommentEnd)) // --> HTMLCloseComment
  1835. {
  1836. goto LSkipLineComment;
  1837. }
  1838. }
  1839. break;
  1840. }
  1841. break;
  1842. case '*':
  1843. token = tkStar;
  1844. switch(this->PeekFirst(p, last))
  1845. {
  1846. case '=' :
  1847. p++;
  1848. token = tkAsgMul;
  1849. break;
  1850. case '*' :
  1851. if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled())
  1852. {
  1853. break;
  1854. }
  1855. p++;
  1856. token = tkExpo;
  1857. if (this->PeekFirst(p, last) == '=')
  1858. {
  1859. p++;
  1860. token = tkAsgExpo;
  1861. }
  1862. }
  1863. break;
  1864. case '/':
  1865. token = tkDiv;
  1866. switch(this->PeekFirst(p, last))
  1867. {
  1868. case '=':
  1869. p++;
  1870. token = tkAsgDiv;
  1871. break;
  1872. case '/':
  1873. if (p >= last)
  1874. {
  1875. AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?");
  1876. // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning
  1877. // those, we have passed "last" already. Move back and return EOF.
  1878. p = last;
  1879. goto LEof;
  1880. }
  1881. ch = *++p;
  1882. firstChar = (OLECHAR)ch;
  1883. LSkipLineComment:
  1884. pchT = NULL;
  1885. for (;;)
  1886. {
  1887. switch ((ch = this->ReadFirst(p, last)))
  1888. {
  1889. case kchLS: // 0x2028, classifies as new line
  1890. case kchPS: // 0x2029, classifies as new line
  1891. LEcmaCommentLineBreak:
  1892. // kchPS and kchLS are more than one unit in UTF-8.
  1893. if (pchT)
  1894. {
  1895. // kchPS and kchLS are more than one unit in UTF-8.
  1896. p = pchT;
  1897. }
  1898. else
  1899. {
  1900. // But only a single code unit in UTF16
  1901. p--;
  1902. }
  1903. this->RestoreMultiUnits(multiUnits);
  1904. goto LCommentLineBreak;
  1905. case kchNWL:
  1906. case kchRET:
  1907. p--;
  1908. LCommentLineBreak:
  1909. if (m_fSyntaxColor)
  1910. {
  1911. token = tkComment;
  1912. goto LDone;
  1913. }
  1914. // Subtract the comment length from the total char count for the purpose
  1915. // of deciding whether to defer AST and byte code generation.
  1916. m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok));
  1917. break;
  1918. case kchNUL:
  1919. if (p >= last)
  1920. {
  1921. p--;
  1922. goto LCommentLineBreak;
  1923. }
  1924. continue;
  1925. default:
  1926. if (this->IsMultiUnitChar((OLECHAR)ch))
  1927. {
  1928. pchT = p - 1;
  1929. multiUnits = this->m_cMultiUnits;
  1930. switch (ch = this->template ReadRest<true>((OLECHAR)ch, p, last))
  1931. {
  1932. case kchLS:
  1933. case kchPS:
  1934. goto LEcmaCommentLineBreak;
  1935. }
  1936. }
  1937. continue;
  1938. }
  1939. break;
  1940. }
  1941. continue;
  1942. case '*':
  1943. ch = *++p;
  1944. firstChar = (OLECHAR)ch;
  1945. if ((p + 1) < last)
  1946. {
  1947. secondChar = (OLECHAR)(*(p + 1));
  1948. }
  1949. else
  1950. {
  1951. secondChar = '\0';
  1952. }
  1953. LMultiLineComment:
  1954. pchT = p;
  1955. commentStartLine = m_line;
  1956. bool containTypeDef;
  1957. if (tkNone == (token = SkipComment(&pchT, &containTypeDef)))
  1958. {
  1959. // Subtract the comment length from the total char count for the purpose
  1960. // of deciding whether to defer AST and byte code generation.
  1961. m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok));
  1962. p = pchT;
  1963. seenDelimitedCommentEnd = true;
  1964. goto LLoop;
  1965. }
  1966. p = pchT;
  1967. break;
  1968. }
  1969. break;
  1970. case '%':
  1971. Assert(chType == _C_PCT);
  1972. token = tkPct;
  1973. if (this->PeekFirst(p, last) == '=')
  1974. {
  1975. p++;
  1976. token = tkAsgMod;
  1977. }
  1978. break;
  1979. case '<':
  1980. Assert(chType == _C_LT);
  1981. token = tkLT;
  1982. switch (this->PeekFirst(p, last))
  1983. {
  1984. case '=':
  1985. p++;
  1986. token = tkLE;
  1987. break;
  1988. case '<':
  1989. p++;
  1990. token = tkLsh;
  1991. if (this->PeekFirst(p, last) == '=')
  1992. {
  1993. p++;
  1994. token = tkAsgLsh;
  1995. break;
  1996. }
  1997. break;
  1998. case '!':
  1999. // ES 2015 B.1.3 - HTML comments are only allowed when parsing non-module code.
  2000. if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-')
  2001. {
  2002. // This is a "<!--" comment - treat as //
  2003. if (p >= last)
  2004. {
  2005. // Effective source length may have excluded HTMLCommentSuffix "<!-- ... -->". If we are scanning
  2006. // those, we have passed "last" already. Move back and return EOF.
  2007. p = last;
  2008. goto LEof;
  2009. }
  2010. firstChar = '!';
  2011. goto LSkipLineComment;
  2012. }
  2013. break;
  2014. }
  2015. break;
  2016. case '>':
  2017. Assert(chType == _C_GT);
  2018. token = tkGT;
  2019. switch (this->PeekFirst(p, last))
  2020. {
  2021. case '=':
  2022. p++;
  2023. token = tkGE;
  2024. break;
  2025. case '>':
  2026. p++;
  2027. token = tkRsh;
  2028. switch (this->PeekFirst(p, last))
  2029. {
  2030. case '=':
  2031. p++;
  2032. token = tkAsgRsh;
  2033. break;
  2034. case '>':
  2035. p++;
  2036. token = tkRs2;
  2037. if (*p == '=')
  2038. {
  2039. p++;
  2040. token = tkAsgRs2;
  2041. }
  2042. break;
  2043. }
  2044. break;
  2045. }
  2046. break;
  2047. case '^':
  2048. Assert(chType == _C_XOR);
  2049. token = tkXor;
  2050. if (this->PeekFirst(p, last) == '=')
  2051. {
  2052. p++;
  2053. token = tkAsgXor;
  2054. }
  2055. break;
  2056. case '|':
  2057. Assert(chType == _C_BAR);
  2058. token = tkOr;
  2059. switch (this->PeekFirst(p, last))
  2060. {
  2061. case '=':
  2062. p++;
  2063. token = tkAsgOr;
  2064. break;
  2065. case '|':
  2066. p++;
  2067. token = tkLogOr;
  2068. break;
  2069. }
  2070. break;
  2071. case '&':
  2072. Assert(chType == _C_AMP);
  2073. token = tkAnd;
  2074. switch (this->PeekFirst(p, last))
  2075. {
  2076. case '=':
  2077. p++;
  2078. token = tkAsgAnd;
  2079. break;
  2080. case '&':
  2081. p++;
  2082. token = tkLogAnd;
  2083. break;
  2084. }
  2085. break;
  2086. case '\'':
  2087. case '"':
  2088. Assert(chType == _C_QUO || chType == _C_APO);
  2089. LScanStringConstant:
  2090. pchT = p;
  2091. token = this->ScanStringConstant((OLECHAR)ch, &pchT);
  2092. p = pchT;
  2093. break;
  2094. }
  2095. break;
  2096. }
  2097. LDone:
  2098. m_currentCharacter = p;
  2099. return (m_ptoken->tk = token);
  2100. }
  2101. template <typename EncodingPolicy>
  2102. IdentPtr Scanner<EncodingPolicy>::GetSecondaryBufferAsPid()
  2103. {
  2104. bool createPid = true;
  2105. if (m_fSyntaxColor || (m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
  2106. {
  2107. createPid = false;
  2108. }
  2109. if (createPid)
  2110. {
  2111. return m_phtbl->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur);
  2112. }
  2113. else
  2114. {
  2115. return nullptr;
  2116. }
  2117. }
  2118. template <typename EncodingPolicy>
  2119. LPCOLESTR Scanner<EncodingPolicy>::StringFromLong(int32 lw)
  2120. {
  2121. _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10);
  2122. return m_tempChBuf.m_prgch;
  2123. }
  2124. template <typename EncodingPolicy>
  2125. IdentPtr Scanner<EncodingPolicy>::PidFromLong(int32 lw)
  2126. {
  2127. return m_phtbl->PidHashName(StringFromLong(lw));
  2128. }
  2129. template <typename EncodingPolicy>
  2130. LPCOLESTR Scanner<EncodingPolicy>::StringFromDbl(double dbl)
  2131. {
  2132. if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax))
  2133. {
  2134. Error(ERRnoMemory);
  2135. }
  2136. return m_tempChBuf.m_prgch;
  2137. }
  2138. template <typename EncodingPolicy>
  2139. IdentPtr Scanner<EncodingPolicy>::PidFromDbl(double dbl)
  2140. {
  2141. return m_phtbl->PidHashName(StringFromDbl(dbl));
  2142. }
  2143. template <typename EncodingPolicy>
  2144. void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint)
  2145. {
  2146. Capture(restorePoint, 0, 0);
  2147. }
  2148. template <typename EncodingPolicy>
  2149. void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr)
  2150. {
  2151. restorePoint->m_ichMinTok = this->IchMinTok();
  2152. restorePoint->m_ichMinLine = this->IchMinLine();
  2153. restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits;
  2154. restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits;
  2155. restorePoint->m_line = this->m_line;
  2156. restorePoint->m_fHadEol = this->m_fHadEol;
  2157. restorePoint->functionIdIncrement = functionIdIncrement;
  2158. restorePoint->lengthDecr = lengthDecr;
  2159. #ifdef DEBUG
  2160. restorePoint->m_cMultiUnits = this->m_cMultiUnits;
  2161. #endif
  2162. }
  2163. template <typename EncodingPolicy>
  2164. void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint)
  2165. {
  2166. SeekAndScan<false>(restorePoint);
  2167. }
  2168. template <typename EncodingPolicy>
  2169. void Scanner<EncodingPolicy>::SeekToForcingPid(const RestorePoint& restorePoint)
  2170. {
  2171. SeekAndScan<true>(restorePoint);
  2172. }
  2173. template <typename EncodingPolicy>
  2174. template <bool forcePid>
  2175. void Scanner<EncodingPolicy>::SeekAndScan(const RestorePoint& restorePoint)
  2176. {
  2177. this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits;
  2178. this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits;
  2179. this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits;
  2180. this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits);
  2181. if (forcePid)
  2182. {
  2183. this->ScanForcingPid();
  2184. }
  2185. else
  2186. {
  2187. this->Scan();
  2188. }
  2189. this->m_line = restorePoint.m_line;
  2190. this->m_fHadEol = restorePoint.m_fHadEol;
  2191. this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr);
  2192. Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits);
  2193. }
  2194. template <typename EncodingPolicy>
  2195. void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId)
  2196. {
  2197. SeekTo(restorePoint);
  2198. *nextFunctionId += restorePoint.functionIdIncrement;
  2199. }
  2200. // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred.
  2201. template<typename EncodingPolicy>
  2202. HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine)
  2203. {
  2204. if( !pbstrLine )
  2205. {
  2206. return E_POINTER;
  2207. }
  2208. // If we overflow the string, we have a serious problem...
  2209. if (ichMinLine < 0 || static_cast<size_t>(ichMinLine) > AdjustedLength() )
  2210. {
  2211. return E_UNEXPECTED;
  2212. }
  2213. typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine);
  2214. // Determine the length by scanning for the next newline
  2215. charcount_t cch = LineLength(pStart, m_pchLast);
  2216. Assert(cch <= LONG_MAX);
  2217. typename EncodingPolicy::EncodedCharPtr pEnd = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine + cch : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch);
  2218. *pbstrLine = SysAllocStringLen(NULL, cch);
  2219. if (!*pbstrLine)
  2220. {
  2221. return E_OUTOFMEMORY;
  2222. }
  2223. this->ConvertToUnicode(*pbstrLine, cch, pStart, pEnd);
  2224. return S_OK;
  2225. }
  2226. template class Scanner<NullTerminatedUnicodeEncodingPolicy>;
  2227. template class Scanner<NullTerminatedUTF8EncodingPolicy>;
  2228. template class Scanner<NotNullTerminatedUTF8EncodingPolicy>;