Scan.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. #ifdef ENABLE_GLOBALIZATION
  7. namespace Js
  8. {
  9. class DelayLoadWindowsGlobalization;
  10. }
  11. #include "Windows.Globalization.h"
  12. #endif
  13. int CountNewlines(LPCOLESTR psz);
  14. class Parser;
  15. struct ParseContext;
  16. struct Token
  17. {
  18. private:
  19. union
  20. {
  21. struct
  22. {
  23. IdentPtr pid;
  24. const char * pchMin;
  25. int32 length;
  26. };
  27. int32 lw;
  28. struct
  29. {
  30. double dbl;
  31. // maybeInt will be true if the number did not contain 'e', 'E' , or '.'
  32. // notably important in asm.js where the '.' has semantic importance
  33. bool maybeInt;
  34. };
  35. UnifiedRegex::RegexPattern* pattern;
  36. struct
  37. {
  38. charcount_t ichMin;
  39. charcount_t ichLim;
  40. };
  41. } u;
  42. IdentPtr CreateIdentifier(HashTbl * hashTbl);
  43. public:
  44. Token() : tk(tkLim) {}
  45. tokens tk;
  46. BOOL IsIdentifier() const
  47. {
  48. return tk == tkID;
  49. }
  50. IdentPtr GetStr() const
  51. {
  52. Assert(tk == tkStrCon || tk == tkStrTmplBasic || tk == tkStrTmplBegin || tk == tkStrTmplMid || tk == tkStrTmplEnd);
  53. return u.pid;
  54. }
  55. IdentPtr GetIdentifier(HashTbl * hashTbl)
  56. {
  57. Assert(IsIdentifier() || IsReservedWord());
  58. if (u.pid)
  59. {
  60. return u.pid;
  61. }
  62. return CreateIdentifier(hashTbl);
  63. }
  64. int32 GetLong() const
  65. {
  66. Assert(tk == tkIntCon);
  67. return u.lw;
  68. }
  69. IdentPtr GetBigInt() const
  70. {
  71. Assert(tk == tkBigIntCon);
  72. return u.pid;
  73. }
  74. double GetDouble() const
  75. {
  76. Assert(tk == tkFltCon);
  77. return u.dbl;
  78. }
  79. bool GetDoubleMayBeInt() const
  80. {
  81. Assert(tk == tkFltCon);
  82. return u.maybeInt;
  83. }
  84. UnifiedRegex::RegexPattern * GetRegex()
  85. {
  86. Assert(tk == tkRegExp);
  87. return u.pattern;
  88. }
  89. // NOTE: THESE ROUTINES DEPEND ON THE ORDER THAT OPERATORS
  90. // ARE DECLARED IN kwd-xxx.h FILES.
  91. BOOL IsReservedWord() const
  92. {
  93. // Keywords and future reserved words (does not include operators)
  94. return tk < tkID;
  95. }
  96. BOOL IsKeyword() const;
  97. BOOL IsFutureReservedWord(const BOOL isStrictMode) const
  98. {
  99. // Reserved words that are not keywords
  100. return tk >= tkENUM && tk <= (isStrictMode ? tkSTATIC : tkENUM);
  101. }
  102. BOOL IsOperator() const
  103. {
  104. return tk >= tkComma && tk < tkLParen;
  105. }
  106. // UTF16 Scanner are only for syntax coloring. Only support
  107. // defer pid creation for UTF8
  108. void SetIdentifier(const char * pchMin, int32 len)
  109. {
  110. this->u.pid = nullptr;
  111. this->u.pchMin = pchMin;
  112. this->u.length = len;
  113. }
  114. void SetIdentifier(IdentPtr pid)
  115. {
  116. this->u.pid = pid;
  117. this->u.pchMin = nullptr;
  118. }
  119. void SetLong(int32 value)
  120. {
  121. this->u.lw = value;
  122. }
  123. void SetDouble(double dbl, bool maybeInt)
  124. {
  125. this->u.dbl = dbl;
  126. this->u.maybeInt = maybeInt;
  127. }
  128. void SetBigInt(IdentPtr pid)
  129. {
  130. this->u.pid = pid;
  131. this->u.pchMin = nullptr;
  132. }
  133. tokens SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser);
  134. };
  135. typedef BYTE UTF8Char;
  136. typedef UTF8Char* UTF8CharPtr;
  137. class NullTerminatedUnicodeEncodingPolicy
  138. {
  139. public:
  140. typedef OLECHAR EncodedChar;
  141. typedef const OLECHAR *EncodedCharPtr;
  142. protected:
  143. static const bool MultiUnitEncoding = false;
  144. static const size_t m_cMultiUnits = 0;
  145. static BOOL IsMultiUnitChar(OLECHAR ch) { return FALSE; }
  146. // See comment below regarding unused 'last' parameter
  147. static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
  148. template <bool bScan>
  149. static OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last) { return ch; }
  150. template <bool bScan>
  151. static OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
  152. static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
  153. static OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
  154. static OLECHAR ReadSurrogatePairUpper(const EncodedCharPtr&, const EncodedCharPtr& last)
  155. {
  156. AssertMsg(false, "method should not be called while scanning UTF16 string");
  157. return 0xfffe;
  158. }
  159. static void RestoreMultiUnits(size_t multiUnits) { }
  160. static size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset) { return offset; }
  161. static void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end)
  162. {
  163. Unused(end);
  164. js_memcpy_s(pch, cch * sizeof(OLECHAR), start, cch * sizeof(OLECHAR));
  165. }
  166. public:
  167. void Clear() {}
  168. void SetIsUtf8(bool isUtf8) { }
  169. bool IsUtf8() const { return false; }
  170. };
  171. template <bool nullTerminated>
  172. class UTF8EncodingPolicyBase
  173. {
  174. public:
  175. typedef utf8char_t EncodedChar;
  176. typedef LPCUTF8 EncodedCharPtr;
  177. protected:
  178. static const bool MultiUnitEncoding = true;
  179. size_t m_cMultiUnits;
  180. utf8::DecodeOptions m_decodeOptions;
  181. UTF8EncodingPolicyBase() { Clear(); }
  182. static BOOL IsMultiUnitChar(OLECHAR ch) { return ch > 0x7f; }
  183. // Note when nullTerminated is false we still need to increment the character pointer because the scanner "puts back" this virtual null character by decrementing the pointer
  184. static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p++) : (p++, 0); }
  185. // "bScan" indicates if this ReadFull is part of scanning. Pass true during scanning and ReadFull will update
  186. // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
  187. // otherwise and this doesn't affect Scanner state.
  188. template <bool bScan>
  189. OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last)
  190. {
  191. EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
  192. return !IsMultiUnitChar(ch) ? static_cast<OLECHAR>(ch) : ReadRest<bScan>(ch, p, last);
  193. }
  194. OLECHAR ReadSurrogatePairUpper(EncodedCharPtr &p, EncodedCharPtr last)
  195. {
  196. EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
  197. Assert(IsMultiUnitChar(ch));
  198. this->m_decodeOptions |= utf8::DecodeOptions::doSecondSurrogatePair;
  199. return ReadRest<true>(ch, p, last);
  200. }
  201. static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p) : 0; }
  202. OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last)
  203. {
  204. OLECHAR result = PeekFirst(p, last);
  205. if (IsMultiUnitChar(result))
  206. {
  207. result = ReadFull<false>(p, last);
  208. }
  209. return result;
  210. }
  211. // "bScan" indicates if this ReadRest is part of scanning. Pass true during scanning and ReadRest will update
  212. // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
  213. // otherwise and this doesn't affect Scanner state.
  214. template <bool bScan>
  215. OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last)
  216. {
  217. EncodedCharPtr s;
  218. if (bScan)
  219. {
  220. s = p;
  221. }
  222. OLECHAR result = utf8::DecodeTail(ch, p, last, m_decodeOptions);
  223. if (bScan)
  224. {
  225. // If we are scanning, update m_cMultiUnits counter.
  226. m_cMultiUnits += p - s;
  227. }
  228. return result;
  229. }
  230. void RestoreMultiUnits(size_t multiUnits) { m_cMultiUnits = multiUnits; }
  231. size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset)
  232. {
  233. // Note: current may be before or after last. If last is the null terminator, current should be within [start, last].
  234. // But if we excluded HTMLCommentSuffix for the source, last is before "// -->\0". Scanner may stop at null
  235. // terminator past last, then current is after last.
  236. Assert(current >= start);
  237. size_t currentUnitOffset = current - start;
  238. Assert(currentUnitOffset > m_cMultiUnits);
  239. Assert(currentUnitOffset - m_cMultiUnits < LONG_MAX);
  240. charcount_t currentCharacterOffset = charcount_t(currentUnitOffset - m_cMultiUnits);
  241. // If the offset is the current character offset then just return the current unit offset.
  242. if (currentCharacterOffset == offset) return currentUnitOffset;
  243. // If we have not encountered any multi-unit characters and we are moving backward the
  244. // character index and unit index are 1:1 so just return offset
  245. if (m_cMultiUnits == 0 && offset <= currentCharacterOffset) return offset;
  246. // Use local decode options
  247. utf8::DecodeOptions decodeOptions = IsUtf8() ? utf8::doDefault : utf8::doAllowThreeByteSurrogates;
  248. if (offset > currentCharacterOffset)
  249. {
  250. // If we are looking for an offset past current, current must be within [start, last]. We don't expect seeking
  251. // scanner position past last.
  252. Assert(current <= last);
  253. // If offset > currentOffset we already know the current character offset. The unit offset is the
  254. // unit index of offset - currentOffset characters from current.
  255. charcount_t charsLeft = offset - currentCharacterOffset;
  256. return currentUnitOffset + utf8::CharacterIndexToByteIndex(current, last - current, charsLeft, decodeOptions);
  257. }
  258. // If all else fails calculate the index from the start of the buffer.
  259. return utf8::CharacterIndexToByteIndex(start, currentUnitOffset, offset, decodeOptions);
  260. }
  261. void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end)
  262. {
  263. m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doSecondSurrogatePair);
  264. utf8::DecodeUnitsInto(pch, start, end, m_decodeOptions);
  265. }
  266. public:
  267. void Clear()
  268. {
  269. m_cMultiUnits = 0;
  270. m_decodeOptions = utf8::doAllowThreeByteSurrogates;
  271. }
  272. // If we get UTF8 source buffer, turn off doAllowThreeByteSurrogates but allow invalid WCHARs without replacing them with replacement 'g_chUnknown'.
  273. void SetIsUtf8(bool isUtf8)
  274. {
  275. if (isUtf8)
  276. {
  277. m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowThreeByteSurrogates | utf8::doAllowInvalidWCHARs);
  278. }
  279. else
  280. {
  281. m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowInvalidWCHARs | utf8::doAllowThreeByteSurrogates);
  282. }
  283. }
  284. bool IsUtf8() const { return (m_decodeOptions & utf8::doAllowThreeByteSurrogates) == 0; }
  285. };
  286. typedef UTF8EncodingPolicyBase<false> NotNullTerminatedUTF8EncodingPolicy;
  287. interface IScanner
  288. {
  289. virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine) = 0;
  290. virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine) = 0;
  291. };
  292. // Flags that can be provided to the Scan functions.
  293. // These can be bitwise OR'ed.
  294. enum ScanFlag
  295. {
  296. ScanFlagNone = 0,
  297. ScanFlagSuppressStrPid = 1, // Force strings to always have pid
  298. };
  299. typedef HRESULT (*CommentCallback)(void *data, OLECHAR firstChar, OLECHAR secondChar, bool containTypeDef, charcount_t min, charcount_t lim, bool adjacent, bool multiline, charcount_t startLine, charcount_t endLine);
  300. // Restore point defined using a relative offset rather than a pointer.
  301. struct RestorePoint
  302. {
  303. Field(charcount_t) m_ichMinTok;
  304. Field(charcount_t) m_ichMinLine;
  305. Field(size_t) m_cMinTokMultiUnits;
  306. Field(size_t) m_cMinLineMultiUnits;
  307. Field(charcount_t) m_line;
  308. Field(uint) functionIdIncrement;
  309. Field(size_t) lengthDecr;
  310. Field(BOOL) m_fHadEol;
  311. #ifdef DEBUG
  312. Field(size_t) m_cMultiUnits;
  313. #endif
  314. RestorePoint()
  315. : m_ichMinTok((charcount_t)-1),
  316. m_ichMinLine((charcount_t)-1),
  317. m_cMinTokMultiUnits((size_t)-1),
  318. m_cMinLineMultiUnits((size_t)-1),
  319. m_line((charcount_t)-1),
  320. functionIdIncrement(0),
  321. lengthDecr(0),
  322. m_fHadEol(FALSE)
  323. #ifdef DEBUG
  324. , m_cMultiUnits((size_t)-1)
  325. #endif
  326. {
  327. };
  328. };
  329. template <typename EncodingPolicy>
  330. class Scanner : public IScanner, public EncodingPolicy
  331. {
  332. friend Parser;
  333. typedef typename EncodingPolicy::EncodedChar EncodedChar;
  334. typedef typename EncodingPolicy::EncodedCharPtr EncodedCharPtr;
  335. public:
  336. Scanner(Parser* parser, Token *ptoken, Js::ScriptContext *scriptContext);
  337. ~Scanner(void);
  338. tokens Scan();
  339. tokens ScanNoKeywords();
  340. tokens ScanForcingPid();
  341. void SetText(EncodedCharPtr psz, size_t offset, size_t length, charcount_t characterOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber = 0);
  342. #if ENABLE_BACKGROUND_PARSING
  343. void PrepareForBackgroundParse(Js::ScriptContext *scriptContext);
  344. #endif
  345. enum ScanState
  346. {
  347. ScanStateNormal = 0,
  348. ScanStateStringTemplateMiddleOrEnd = 1,
  349. };
  350. ScanState GetScanState() { return m_scanState; }
  351. void SetScanState(ScanState state) { m_scanState = state; }
  352. bool SetYieldIsKeywordRegion(bool fYieldIsKeywordRegion)
  353. {
  354. bool fPrevYieldIsKeywordRegion = m_fYieldIsKeywordRegion;
  355. m_fYieldIsKeywordRegion = fYieldIsKeywordRegion;
  356. return fPrevYieldIsKeywordRegion;
  357. }
  358. bool YieldIsKeywordRegion()
  359. {
  360. return m_fYieldIsKeywordRegion;
  361. }
  362. bool YieldIsKeyword()
  363. {
  364. return YieldIsKeywordRegion() || this->IsStrictMode();
  365. }
  366. bool SetAwaitIsKeywordRegion(bool fAwaitIsKeywordRegion)
  367. {
  368. bool fPrevAwaitIsKeywordRegion = m_fAwaitIsKeywordRegion;
  369. m_fAwaitIsKeywordRegion = fAwaitIsKeywordRegion;
  370. return fPrevAwaitIsKeywordRegion;
  371. }
  372. bool AwaitIsKeywordRegion()
  373. {
  374. return m_fAwaitIsKeywordRegion;
  375. }
  376. bool AwaitIsKeyword()
  377. {
  378. return AwaitIsKeywordRegion() || this->m_fIsModuleCode;
  379. }
  380. tokens TryRescanRegExp();
  381. tokens RescanRegExp();
  382. tokens RescanRegExpNoAST();
  383. tokens RescanRegExpTokenizer();
  384. BOOL FHadNewLine(void)
  385. {
  386. return m_fHadEol;
  387. }
  388. IdentPtr PidFromLong(int32 lw);
  389. IdentPtr PidFromDbl(double dbl);
  390. LPCOLESTR StringFromLong(int32 lw);
  391. LPCOLESTR StringFromDbl(double dbl);
  392. IdentPtr GetSecondaryBufferAsPid();
  393. BYTE SetDeferredParse(BOOL defer)
  394. {
  395. BYTE fOld = m_DeferredParseFlags;
  396. if (defer)
  397. {
  398. m_DeferredParseFlags |= ScanFlagSuppressStrPid;
  399. }
  400. else
  401. {
  402. m_DeferredParseFlags = ScanFlagNone;
  403. }
  404. return fOld;
  405. }
  406. void SetDeferredParseFlags(BYTE flags)
  407. {
  408. m_DeferredParseFlags = flags;
  409. }
  410. // the functions IsDoubleQuoteOnLastTkStrCon() and IsHexOrOctOnLastTKNumber() works only with a scanner without lookahead
  411. // Both functions are used to get more info on the last token for specific diffs necessary for JSON parsing.
  412. //Single quotes are not legal in JSON strings. Make distinction between single quote string constant and single quote string
  413. BOOL IsDoubleQuoteOnLastTkStrCon()
  414. {
  415. return m_doubleQuoteOnLastTkStrCon;
  416. }
  417. // True if all chars of last string constant are ascii
  418. BOOL IsEscapeOnLastTkStrCon()
  419. {
  420. return m_EscapeOnLastTkStrCon;
  421. }
  422. bool LastIdentifierHasEscape()
  423. {
  424. return m_lastIdentifierHasEscape;
  425. }
  426. bool IsOctOrLeadingZeroOnLastTKNumber()
  427. {
  428. return m_OctOrLeadingZeroOnLastTKNumber;
  429. }
  430. // Returns the character offset of the first token. The character offset is the offset the first character of the token would
  431. // have if the entire file was converted to Unicode (UTF16-LE).
  432. charcount_t IchMinTok(void) const
  433. {
  434. Assert(m_pchMinTok - m_pchBase >= 0);
  435. Assert(m_pchMinTok - m_pchBase <= LONG_MAX);
  436. Assert(static_cast<charcount_t>(m_pchMinTok - m_pchBase) >= m_cMinTokMultiUnits);
  437. return static_cast<charcount_t>(m_pchMinTok - m_pchBase - m_cMinTokMultiUnits);
  438. }
  439. // Returns the character offset of the character immediately following the token. The character offset is the offset the first
  440. // character of the token would have if the entire file was converted to Unicode (UTF16-LE).
  441. charcount_t IchLimTok(void) const
  442. {
  443. Assert(m_currentCharacter - m_pchBase >= 0);
  444. Assert(m_currentCharacter - m_pchBase <= LONG_MAX);
  445. Assert(static_cast<charcount_t>(m_currentCharacter - m_pchBase) >= this->m_cMultiUnits);
  446. return static_cast<charcount_t>(m_currentCharacter - m_pchBase - this->m_cMultiUnits);
  447. }
  448. void SetErrorPosition(charcount_t ichMinError, charcount_t ichLimError)
  449. {
  450. Assert(ichLimError > 0 || ichMinError == 0);
  451. m_ichMinError = ichMinError;
  452. m_ichLimError = ichLimError;
  453. }
  454. charcount_t IchMinError(void) const
  455. {
  456. return m_ichLimError ? m_ichMinError : IchMinTok();
  457. }
  458. charcount_t IchLimError(void) const
  459. {
  460. return m_ichLimError ? m_ichLimError : IchLimTok();
  461. }
  462. // Returns the encoded unit offset of first character of the token. For example, in a UTF-8 encoding this is the offset into
  463. // the UTF-8 buffer. In Unicode this is the same as IchMinTok().
  464. size_t IecpMinTok(void) const
  465. {
  466. return static_cast< size_t >(m_pchMinTok - m_pchBase);
  467. }
  468. // Returns the encoded unit offset of the character immediately following the token. For example, in a UTF-8 encoding this is
  469. // the offset into the UTF-8 buffer. In Unicode this is the same as IchLimTok().
  470. size_t IecpLimTok(void) const
  471. {
  472. return static_cast< size_t >(m_currentCharacter - m_pchBase);
  473. }
  474. size_t IecpLimTokPrevious() const
  475. {
  476. AssertMsg(m_iecpLimTokPrevious != (size_t)-1, "IecpLimTokPrevious() cannot be called before scanning a token");
  477. return m_iecpLimTokPrevious;
  478. }
  479. charcount_t IchLimTokPrevious() const
  480. {
  481. AssertMsg(m_ichLimTokPrevious != (charcount_t)-1, "IchLimTokPrevious() cannot be called before scanning a token");
  482. return m_ichLimTokPrevious;
  483. }
  484. IdentPtr PidAt(size_t iecpMin, size_t iecpLim);
  485. // Returns the character offset within the stream of the first character on the current line.
  486. charcount_t IchMinLine(void) const
  487. {
  488. Assert(m_pchMinLine - m_pchBase >= 0);
  489. Assert(m_pchMinLine - m_pchBase <= LONG_MAX);
  490. Assert(static_cast<charcount_t>(m_pchMinLine - m_pchBase) >= m_cMinLineMultiUnits);
  491. return static_cast<charcount_t>(m_pchMinLine - m_pchBase - m_cMinLineMultiUnits);
  492. }
  493. // Returns the current line number
  494. charcount_t LineCur(void) const { return m_line; }
  495. void SetCurrentCharacter(charcount_t offset, ULONG lineNumber = 0)
  496. {
  497. DebugOnly(m_iecpLimTokPrevious = (size_t)-1);
  498. DebugOnly(m_ichLimTokPrevious = (charcount_t)-1);
  499. size_t length = m_pchLast - m_pchBase;
  500. if (offset > length) offset = static_cast< charcount_t >(length);
  501. size_t ibOffset = this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, offset);
  502. m_currentCharacter = m_pchBase + ibOffset;
  503. Assert(ibOffset >= offset);
  504. this->RestoreMultiUnits(ibOffset - offset);
  505. m_line = lineNumber;
  506. }
  507. // IScanner methods
  508. virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine)
  509. {
  510. ichMin = this->IchMinError();
  511. ichLim = this->IchLimError();
  512. line = this->LineCur();
  513. ichMinLine = this->IchMinLine();
  514. if (m_ichLimError && m_ichMinError < (charcount_t)ichMinLine)
  515. {
  516. line = m_startLine;
  517. ichMinLine = UpdateLine(line, m_pchStartLine, m_pchLast, 0, ichMin);
  518. }
  519. }
  520. virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine);
  521. class TemporaryBuffer
  522. {
  523. friend Scanner<EncodingPolicy>;
  524. private:
  525. // Keep a reference to the scanner.
  526. // We will use it to signal an error if we fail to allocate the buffer.
  527. Scanner<EncodingPolicy>* m_pscanner;
  528. uint32 m_cchMax;
  529. uint32 m_ichCur;
  530. __field_ecount(m_cchMax) OLECHAR *m_prgch;
  531. byte m_rgbInit[256];
  532. public:
  533. TemporaryBuffer()
  534. {
  535. m_pscanner = nullptr;
  536. m_prgch = (OLECHAR*)m_rgbInit;
  537. m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR);
  538. m_ichCur = 0;
  539. }
  540. ~TemporaryBuffer()
  541. {
  542. if (m_prgch != (OLECHAR*)m_rgbInit)
  543. {
  544. free(m_prgch);
  545. }
  546. }
  547. void Reset()
  548. {
  549. m_ichCur = 0;
  550. }
  551. void Clear()
  552. {
  553. if (m_prgch != (OLECHAR*)m_rgbInit)
  554. {
  555. free(m_prgch);
  556. m_prgch = (OLECHAR*)m_rgbInit;
  557. m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR);
  558. }
  559. Reset();
  560. }
  561. void AppendCh(uint ch)
  562. {
  563. return AppendCh<true>(ch);
  564. }
  565. template<bool performAppend> void AppendCh(uint ch)
  566. {
  567. if (performAppend)
  568. {
  569. if (m_ichCur >= m_cchMax)
  570. {
  571. Grow();
  572. }
  573. Assert(m_ichCur < m_cchMax);
  574. __analysis_assume(m_ichCur < m_cchMax);
  575. m_prgch[m_ichCur++] = static_cast<OLECHAR>(ch);
  576. }
  577. }
  578. private:
  579. void Grow()
  580. {
  581. Assert(m_pscanner != nullptr);
  582. byte *prgbNew;
  583. byte *prgbOld = (byte *)m_prgch;
  584. ULONG cbNew;
  585. if (FAILED(ULongMult(m_cchMax, sizeof(OLECHAR) * 2, &cbNew)))
  586. {
  587. m_pscanner->Error(ERRnoMemory);
  588. }
  589. if (prgbOld == m_rgbInit)
  590. {
  591. if (nullptr == (prgbNew = static_cast<byte*>(malloc(cbNew))))
  592. m_pscanner->Error(ERRnoMemory);
  593. js_memcpy_s(prgbNew, cbNew, prgbOld, m_ichCur * sizeof(OLECHAR));
  594. }
  595. else if (nullptr == (prgbNew = static_cast<byte*>(realloc(prgbOld, cbNew))))
  596. {
  597. m_pscanner->Error(ERRnoMemory);
  598. }
  599. m_prgch = (OLECHAR*)prgbNew;
  600. m_cchMax = cbNew / sizeof(OLECHAR);
  601. }
  602. };
  603. tokens GetPrevious() { return m_tkPrevious; }
  604. void Capture(_Out_ RestorePoint* restorePoint);
  605. void SeekTo(const RestorePoint& restorePoint);
  606. void SeekToForcingPid(const RestorePoint& restorePoint);
  607. void Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr);
  608. void SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId);
  609. void Clear();
  610. HashTbl * GetHashTbl() { return &m_htbl; }
  611. private:
  612. Parser *m_parser;
  613. HashTbl m_htbl;
  614. Token *m_ptoken;
  615. EncodedCharPtr m_pchBase; // beginning of source
  616. EncodedCharPtr m_pchLast; // The end of source
  617. EncodedCharPtr m_pchMinLine; // beginning of current line
  618. EncodedCharPtr m_pchMinTok; // beginning of current token
  619. EncodedCharPtr m_currentCharacter; // current character
  620. EncodedCharPtr m_pchPrevLine; // beginning of previous line
  621. size_t m_cMinTokMultiUnits; // number of multi-unit characters previous to m_pchMinTok
  622. size_t m_cMinLineMultiUnits; // number of multi-unit characters previous to m_pchMinLine
  623. uint16 m_fStringTemplateDepth; // we should treat } as string template middle starting character (depth instead of flag)
  624. BOOL m_fHadEol;
  625. BOOL m_fIsModuleCode : 1;
  626. BOOL m_doubleQuoteOnLastTkStrCon :1;
  627. bool m_OctOrLeadingZeroOnLastTKNumber :1;
  628. bool m_EscapeOnLastTkStrCon:1;
  629. bool m_lastIdentifierHasEscape:1;
  630. BOOL m_fNextStringTemplateIsTagged:1; // the next string template scanned has a tag (must create raw strings)
  631. BYTE m_DeferredParseFlags:2; // suppressStrPid and suppressIdPid
  632. bool es6UnicodeMode; // True if ES6Unicode Extensions are enabled.
  633. bool m_fYieldIsKeywordRegion; // Whether to treat 'yield' as an identifier or keyword
  634. bool m_fAwaitIsKeywordRegion; // Whether to treat 'await' as an identifier or keyword
  635. // Temporary buffer.
  636. TemporaryBuffer m_tempChBuf;
  637. TemporaryBuffer m_tempChBufSecondary;
  638. charcount_t m_line;
  639. ScanState m_scanState;
  640. charcount_t m_ichMinError;
  641. charcount_t m_ichLimError;
  642. charcount_t m_startLine;
  643. EncodedCharPtr m_pchStartLine;
  644. Js::ScriptContext* m_scriptContext;
  645. const Js::CharClassifier *charClassifier;
  646. tokens m_tkPrevious;
  647. size_t m_iecpLimTokPrevious;
  648. charcount_t m_ichLimTokPrevious;
  649. void ClearStates();
  650. template <bool forcePid>
  651. void SeekAndScan(const RestorePoint& restorePoint);
  652. tokens ScanCore(bool identifyKwds);
  653. tokens ScanAhead();
  654. tokens ScanError(EncodedCharPtr pchCur, tokens errorToken)
  655. {
  656. m_currentCharacter = pchCur;
  657. return m_ptoken->tk = tkScanError;
  658. }
  659. __declspec(noreturn) void Error(HRESULT hr)
  660. {
  661. m_pchMinTok = m_currentCharacter;
  662. m_cMinTokMultiUnits = this->m_cMultiUnits;
  663. throw ParseExceptionObject(hr);
  664. }
  665. EncodedCharPtr PchBase(void) const
  666. {
  667. return m_pchBase;
  668. }
  669. EncodedCharPtr PchMinTok(void)
  670. {
  671. return m_pchMinTok;
  672. }
  673. template<bool stringTemplateMode, bool createRawString> tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);
  674. tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);
  675. tokens ScanStringTemplateBegin(EncodedCharPtr *pp);
  676. tokens ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp);
  677. void ScanNewLine(uint ch);
  678. void NotifyScannedNewLine();
  679. charcount_t LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb);
  680. tokens ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp);
  681. BOOL FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last);
  682. tokens ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar, EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp);
  683. tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef);
  684. tokens ScanRegExpConstant(ArenaAllocator* alloc);
  685. tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc);
  686. EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, LikelyNumberType& likelyInt, size_t savedMultiUnits);
  687. IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar);
  688. IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last);
  689. uint32 UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last);
  690. void SaveSrcPos(void)
  691. {
  692. m_pchMinTok = m_currentCharacter;
  693. }
  694. OLECHAR PeekNextChar(void)
  695. {
  696. return this->PeekFull(m_currentCharacter, m_pchLast);
  697. }
  698. OLECHAR ReadNextChar(void)
  699. {
  700. return this->template ReadFull<true>(m_currentCharacter, m_pchLast);
  701. }
  702. EncodedCharPtr AdjustedLast() const
  703. {
  704. return m_pchLast;
  705. }
  706. size_t AdjustedLength() const
  707. {
  708. return AdjustedLast() - m_pchBase;
  709. }
  710. bool IsStrictMode() const
  711. {
  712. return this->m_parser != NULL && this->m_parser->IsStrictMode();
  713. }
  714. // This function expects the first character to be a 'u'
  715. // It will attempt to return a codepoint represented by a single escape point (either of the form \uXXXX or \u{any number of hex characters, s.t. value < 0x110000}
  716. bool TryReadEscape(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar = nullptr);
  717. template <bool bScan>
  718. bool TryReadCodePointRest(codepoint_t lower, EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar);
  719. template <bool bScan>
  720. inline bool TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar);
  721. inline BOOL IsIdContinueNext(EncodedCharPtr startingLocation, EncodedCharPtr endOfSource)
  722. {
  723. codepoint_t nextCodepoint;
  724. bool ignore;
  725. if (TryReadCodePoint<false>(startingLocation, endOfSource, &nextCodepoint, &ignore, &ignore))
  726. {
  727. return charClassifier->IsIdContinue(nextCodepoint);
  728. }
  729. return false;
  730. }
  731. charcount_t UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd);
  732. };