Scan.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. namespace Js
  7. {
  8. class DelayLoadWindowsGlobalization;
  9. }
  10. #include "Windows.Globalization.h"
  11. int CountNewlines(LPCOLESTR psz, int cch = -1);
  12. class Parser;
  13. struct ParseContext;
  14. struct Token
  15. {
  16. private:
  17. union
  18. {
  19. struct
  20. {
  21. IdentPtr pid;
  22. const char * pchMin;
  23. long length;
  24. };
  25. long lw;
  26. struct
  27. {
  28. double dbl;
  29. // maybeInt will be true if the number did not contain 'e', 'E' , or '.'
  30. // notably important in asm.js where the '.' has semantic importance
  31. bool maybeInt;
  32. };
  33. UnifiedRegex::RegexPattern* pattern;
  34. struct
  35. {
  36. charcount_t ichMin;
  37. charcount_t ichLim;
  38. };
  39. } u;
  40. IdentPtr CreateIdentifier(HashTbl * hashTbl);
  41. public:
  42. Token() : tk(tkLim) {}
  43. tokens tk;
  44. BOOL IsIdentifier() const
  45. {
  46. return tk == tkID;
  47. }
  48. IdentPtr GetStr() const
  49. {
  50. Assert(tk == tkStrCon || tk == tkStrTmplBasic || tk == tkStrTmplBegin || tk == tkStrTmplMid || tk == tkStrTmplEnd);
  51. return u.pid;
  52. }
  53. IdentPtr GetIdentifier(HashTbl * hashTbl)
  54. {
  55. Assert(IsIdentifier() || IsReservedWord());
  56. if (u.pid)
  57. {
  58. return u.pid;
  59. }
  60. return CreateIdentifier(hashTbl);
  61. }
  62. long GetLong() const
  63. {
  64. Assert(tk == tkIntCon);
  65. return u.lw;
  66. }
  67. double GetDouble() const
  68. {
  69. Assert(tk == tkFltCon);
  70. return u.dbl;
  71. }
  72. bool GetDoubleMayBeInt() const
  73. {
  74. Assert(tk == tkFltCon);
  75. return u.maybeInt;
  76. }
  77. UnifiedRegex::RegexPattern * GetRegex()
  78. {
  79. Assert(tk == tkRegExp);
  80. return u.pattern;
  81. }
  82. // NOTE: THESE ROUTINES DEPEND ON THE ORDER THAT OPERATORS
  83. // ARE DECLARED IN kwd-xxx.h FILES.
  84. BOOL IsReservedWord() const
  85. {
  86. // Keywords and future reserved words (does not include operators)
  87. return tk < tkID;
  88. }
  89. BOOL IsKeyword() const;
  90. BOOL IsFutureReservedWord(const BOOL isStrictMode) const
  91. {
  92. // Reserved words that are not keywords
  93. return tk >= tkENUM && tk <= (isStrictMode ? tkSTATIC : tkENUM);
  94. }
  95. BOOL IsOperator() const
  96. {
  97. return tk >= tkComma && tk < tkLParen;
  98. }
  99. // UTF16 Scanner are only for syntax coloring. Only support
  100. // defer pid creation for UTF8
  101. void SetIdentifier(const char * pchMin, long len)
  102. {
  103. this->u.pid = nullptr;
  104. this->u.pchMin = pchMin;
  105. this->u.length = len;
  106. }
  107. void SetIdentifier(IdentPtr pid)
  108. {
  109. this->u.pid = pid;
  110. this->u.pchMin = nullptr;
  111. }
  112. void SetLong(long value)
  113. {
  114. this->u.lw = value;
  115. }
  116. void SetDouble(double dbl, bool maybeInt)
  117. {
  118. this->u.dbl = dbl;
  119. this->u.maybeInt = maybeInt;
  120. }
  121. tokens SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser);
  122. };
  123. typedef BYTE UTF8Char;
  124. typedef UTF8Char* UTF8CharPtr;
  125. class NullTerminatedUnicodeEncodingPolicy
  126. {
  127. public:
  128. typedef OLECHAR EncodedChar;
  129. typedef const OLECHAR *EncodedCharPtr;
  130. protected:
  131. static const bool MultiUnitEncoding = false;
  132. static const size_t m_cMultiUnits = 0;
  133. static BOOL IsMultiUnitChar(OLECHAR ch) { return FALSE; }
  134. // See comment below regarding unused 'last' parameter
  135. static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
  136. template <bool bScan>
  137. static OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last) { return ch; }
  138. template <bool bScan>
  139. static OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
  140. static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
  141. static OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
  142. static void RestoreMultiUnits(size_t multiUnits) { }
  143. static size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset) { return offset; }
  144. static void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr pu)
  145. {
  146. js_memcpy_s(pch, cch * sizeof(OLECHAR), pu, cch * sizeof(OLECHAR));
  147. }
  148. public:
  149. void FromExternalSource() { }
  150. bool IsFromExternalSource() { return false; }
  151. };
  152. template <bool nullTerminated>
  153. class UTF8EncodingPolicyBase
  154. {
  155. public:
  156. typedef utf8char_t EncodedChar;
  157. typedef LPCUTF8 EncodedCharPtr;
  158. protected:
  159. static const bool MultiUnitEncoding = true;
  160. size_t m_cMultiUnits;
  161. utf8::DecodeOptions m_decodeOptions;
  162. UTF8EncodingPolicyBase(): m_cMultiUnits(0), m_decodeOptions(utf8::doAllowThreeByteSurrogates) { }
  163. static BOOL IsMultiUnitChar(OLECHAR ch) { return ch > 0x7f; }
  164. // Note when nullTerminated is false we still need to increment the character pointer because the scanner "puts back" this virtual null character by decrementing the pointer
  165. static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast< OLECHAR >(*p++) : (p++, 0); }
  166. // "bScan" indicates if this ReadFull is part of scanning. Pass true during scanning and ReadFull will update
  167. // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
  168. // otherwise and this doesn't affect Scanner state.
  169. template <bool bScan>
  170. OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last)
  171. {
  172. EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
  173. return !IsMultiUnitChar(ch) ? static_cast< OLECHAR >(ch) : ReadRest<bScan>(ch, p, last);
  174. }
  175. static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast< OLECHAR >(*p) : 0; }
  176. OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last)
  177. {
  178. OLECHAR result = PeekFirst(p, last);
  179. if (IsMultiUnitChar(result))
  180. {
  181. result = ReadFull<false>(p, last);
  182. }
  183. return result;
  184. }
  185. // "bScan" indicates if this ReadRest is part of scanning. Pass true during scanning and ReadRest will update
  186. // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
  187. // otherwise and this doesn't affect Scanner state.
  188. template <bool bScan>
  189. OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last)
  190. {
  191. EncodedCharPtr s;
  192. if (bScan)
  193. {
  194. s = p;
  195. }
  196. OLECHAR result = utf8::DecodeTail(ch, p, last, m_decodeOptions);
  197. if (bScan)
  198. {
  199. // If we are scanning, update m_cMultiUnits counter.
  200. m_cMultiUnits += p - s;
  201. }
  202. return result;
  203. }
  204. void RestoreMultiUnits(size_t multiUnits) { m_cMultiUnits = multiUnits; }
  205. size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset)
  206. {
  207. // Note: current may be before or after last. If last is the null terminator, current should be within [start, last].
  208. // But if we excluded HTMLCommentSuffix for the source, last is before "// -->\0". Scanner may stop at null
  209. // terminator past last, then current is after last.
  210. Assert(current >= start);
  211. size_t currentUnitOffset = current - start;
  212. Assert(currentUnitOffset > m_cMultiUnits);
  213. Assert(currentUnitOffset - m_cMultiUnits < LONG_MAX);
  214. charcount_t currentCharacterOffset = charcount_t(currentUnitOffset - m_cMultiUnits);
  215. // If the offset is the current character offset then just return the current unit offset.
  216. if (currentCharacterOffset == offset) return currentUnitOffset;
  217. // If we have not encountered any multi-unit characters and we are moving backward the
  218. // character index and unit index are 1:1 so just return offset
  219. if (m_cMultiUnits == 0 && offset <= currentCharacterOffset) return offset;
  220. // Use local decode options
  221. utf8::DecodeOptions decodeOptions = IsFromExternalSource() ? utf8::doDefault : utf8::doAllowThreeByteSurrogates;
  222. if (offset > currentCharacterOffset)
  223. {
  224. // If we are looking for an offset past current, current must be within [start, last]. We don't expect seeking
  225. // scanner position past last.
  226. Assert(current <= last);
  227. // If offset > currentOffset we already know the current character offset. The unit offset is the
  228. // unit index of offset - currentOffset characters from current.
  229. charcount_t charsLeft = offset - currentCharacterOffset;
  230. return currentUnitOffset + utf8::CharacterIndexToByteIndex(current, last - current, charsLeft, decodeOptions);
  231. }
  232. // If all else fails calculate the index from the start of the buffer.
  233. return utf8::CharacterIndexToByteIndex(start, currentUnitOffset, offset, decodeOptions);
  234. }
  235. void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr pu)
  236. {
  237. m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doSecondSurrogatePair);
  238. utf8::DecodeInto(pch, pu, cch, m_decodeOptions);
  239. }
  240. public:
  241. // If we get UTF8 source buffer, turn off doAllowThreeByteSurrogates but allow invalid WCHARs without replacing them with replacement 'g_chUnknown'.
  242. void FromExternalSource() { m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowThreeByteSurrogates | utf8::doAllowInvalidWCHARs); }
  243. bool IsFromExternalSource() { return (m_decodeOptions & utf8::doAllowThreeByteSurrogates) == 0; }
  244. };
  245. typedef UTF8EncodingPolicyBase<true> NullTerminatedUTF8EncodingPolicy;
  246. typedef UTF8EncodingPolicyBase<false> NotNullTerminatedUTF8EncodingPolicy;
  247. interface IScanner
  248. {
  249. virtual void GetErrorLineInfo(__out long& ichMin, __out long& ichLim, __out long& line, __out long& ichMinLine) = 0;
  250. virtual HRESULT SysAllocErrorLine(long ichMinLine, __out BSTR* pbstrLine) = 0;
  251. };
  252. // Flags that can be provided to the Scan functions.
  253. // These can be bitwise OR'ed.
  254. enum ScanFlag
  255. {
  256. ScanFlagNone = 0,
  257. ScanFlagSuppressStrPid = 1, // Force strings to always have pid
  258. ScanFlagSuppressIdPid = 2 // Force identifiers to always have pid (currently unused)
  259. };
  260. typedef HRESULT (*CommentCallback)(void *data, OLECHAR firstChar, OLECHAR secondChar, bool containTypeDef, charcount_t min, charcount_t lim, bool adjacent, bool multiline, charcount_t startLine, charcount_t endLine);
  261. // Restore point defined using a relative offset rather than a pointer.
  262. struct RestorePoint
  263. {
  264. charcount_t m_ichMinTok;
  265. charcount_t m_ichMinLine;
  266. size_t m_cMinTokMultiUnits;
  267. size_t m_cMinLineMultiUnits;
  268. charcount_t m_line;
  269. uint functionIdIncrement;
  270. size_t lengthDecr;
  271. BOOL m_fHadEol;
  272. #ifdef DEBUG
  273. size_t m_cMultiUnits;
  274. #endif
  275. RestorePoint()
  276. : m_ichMinTok((charcount_t)-1),
  277. m_ichMinLine((charcount_t)-1),
  278. m_cMinTokMultiUnits((size_t)-1),
  279. m_cMinLineMultiUnits((size_t)-1),
  280. m_line((charcount_t)-1),
  281. functionIdIncrement(0),
  282. lengthDecr(0),
  283. m_fHadEol(FALSE)
  284. #ifdef DEBUG
  285. , m_cMultiUnits((size_t)-1)
  286. #endif
  287. {
  288. };
  289. };
  290. template <typename EncodingPolicy>
  291. class Scanner : public IScanner, public EncodingPolicy
  292. {
  293. friend Parser;
  294. typedef typename EncodingPolicy::EncodedChar EncodedChar;
  295. typedef typename EncodingPolicy::EncodedCharPtr EncodedCharPtr;
  296. public:
  297. static Scanner * Create(Parser* parser, HashTbl *phtbl, Token *ptoken, ErrHandler *perr, Js::ScriptContext *scriptContext)
  298. {
  299. return HeapNewNoThrow(Scanner, parser, phtbl, ptoken, perr, scriptContext);
  300. }
  301. void Release(void)
  302. {
  303. delete this;
  304. }
  305. tokens Scan();
  306. tokens ScanNoKeywords();
  307. tokens ScanForcingPid();
  308. void SetText(EncodedCharPtr psz, size_t offset, size_t length, charcount_t characterOffset, ULONG grfscr, ULONG lineNumber = 0);
  309. void PrepareForBackgroundParse(Js::ScriptContext *scriptContext);
  310. enum ScanState
  311. {
  312. ScanStateNormal = 0,
  313. ScanStateMultiLineComment = 1,
  314. ScanStateMultiLineSingleQuoteString = 2,
  315. ScanStateMultiLineDoubleQuoteString = 3,
  316. ScanStateStringTemplateMiddleOrEnd = 4,
  317. };
  318. ScanState GetScanState() { return m_scanState; }
  319. void SetScanState(ScanState state) { m_scanState = state; }
  320. bool SetYieldIsKeyword(bool fYieldIsKeyword)
  321. {
  322. bool fPrevYieldIsKeyword = m_fYieldIsKeyword;
  323. m_fYieldIsKeyword = fYieldIsKeyword;
  324. return fPrevYieldIsKeyword;
  325. }
  326. bool YieldIsKeyword()
  327. {
  328. return m_fYieldIsKeyword;
  329. }
  330. bool SetAwaitIsKeyword(bool fAwaitIsKeyword)
  331. {
  332. bool fPrevAwaitIsKeyword = m_fAwaitIsKeyword;
  333. m_fAwaitIsKeyword = fAwaitIsKeyword;
  334. return fPrevAwaitIsKeyword;
  335. }
  336. bool AwaitIsKeyword()
  337. {
  338. return m_fAwaitIsKeyword;
  339. }
  340. tokens TryRescanRegExp();
  341. tokens RescanRegExp();
  342. tokens RescanRegExpNoAST();
  343. tokens RescanRegExpTokenizer();
  344. BOOL FHadNewLine(void)
  345. {
  346. return m_fHadEol;
  347. }
  348. IdentPtr PidFromLong(long lw);
  349. IdentPtr PidFromDbl(double dbl);
  350. LPCOLESTR StringFromLong(long lw);
  351. LPCOLESTR StringFromDbl(double dbl);
  352. IdentPtr GetSecondaryBufferAsPid();
  353. BYTE SetDeferredParse(BOOL defer)
  354. {
  355. BYTE fOld = m_DeferredParseFlags;
  356. if (defer)
  357. {
  358. m_DeferredParseFlags |= ScanFlagSuppressStrPid;
  359. }
  360. else
  361. {
  362. m_DeferredParseFlags = ScanFlagNone;
  363. }
  364. return fOld;
  365. }
  366. void SetDeferredParseFlags(BYTE flags)
  367. {
  368. m_DeferredParseFlags = flags;
  369. }
  370. // the functions IsDoubleQuoteOnLastTkStrCon() and IsHexOrOctOnLastTKNumber() works only with a scanner without lookahead
  371. // Both functions are used to get more info on the last token for specific diffs necessary for JSON parsing.
  372. //Single quotes are not legal in JSON strings. Make distinction between single quote string constant and single quote string
  373. BOOL IsDoubleQuoteOnLastTkStrCon()
  374. {
  375. return m_doubleQuoteOnLastTkStrCon;
  376. }
  377. // True if all chars of last string constant are ascii
  378. BOOL IsEscapeOnLastTkStrCon()
  379. {
  380. return m_EscapeOnLastTkStrCon;
  381. }
  382. bool IsOctOrLeadingZeroOnLastTKNumber()
  383. {
  384. return m_OctOrLeadingZeroOnLastTKNumber;
  385. }
  386. // Returns the character offset of the first token. The character offset is the offset the first character of the token would
  387. // have if the entire file was converted to Unicode (UTF16-LE).
  388. charcount_t IchMinTok(void) const
  389. {
  390. Assert(m_pchMinTok - m_pchBase >= 0);
  391. Assert(m_pchMinTok - m_pchBase <= LONG_MAX);
  392. return static_cast< charcount_t >(m_pchMinTok - m_pchBase - m_cMinTokMultiUnits);
  393. }
  394. // Returns the character offset of the character immediately following the token. The character offset is the offset the first
  395. // character of the token would have if the entire file was converted to Unicode (UTF16-LE).
  396. charcount_t IchLimTok(void) const
  397. {
  398. Assert(m_currentCharacter - m_pchBase >= 0);
  399. Assert(m_currentCharacter - m_pchBase <= LONG_MAX);
  400. return static_cast< charcount_t >(m_currentCharacter - m_pchBase - m_cMultiUnits);
  401. }
  402. void SetErrorPosition(charcount_t ichMinError, charcount_t ichLimError)
  403. {
  404. Assert(ichLimError > 0 || ichMinError == 0);
  405. m_ichMinError = ichMinError;
  406. m_ichLimError = ichLimError;
  407. }
  408. charcount_t IchMinError(void) const
  409. {
  410. return m_ichLimError ? m_ichMinError : IchMinTok();
  411. }
  412. charcount_t IchLimError(void) const
  413. {
  414. return m_ichLimError ? m_ichLimError : IchLimTok();
  415. }
  416. // Returns the encoded unit offset of first character of the token. For example, in a UTF-8 encoding this is the offset into
  417. // the UTF-8 buffer. In Unicode this is the same as IchMinTok().
  418. size_t IecpMinTok(void) const
  419. {
  420. return static_cast< size_t >(m_pchMinTok - m_pchBase);
  421. }
  422. // Returns the encoded unit offset of the character immediately following the token. For example, in a UTF-8 encoding this is
  423. // the offset into the UTF-8 buffer. In Unicode this is the same as IchLimTok().
  424. size_t IecpLimTok(void) const
  425. {
  426. return static_cast< size_t >(m_currentCharacter - m_pchBase);
  427. }
  428. size_t IecpLimTokPrevious() const
  429. {
  430. AssertMsg(m_iecpLimTokPrevious != (size_t)-1, "IecpLimTokPrevious() cannot be called before scanning a token");
  431. return m_iecpLimTokPrevious;
  432. }
  433. IdentPtr PidAt(size_t iecpMin, size_t iecpLim);
  434. // Returns the character offset within the stream of the first character on the current line.
  435. charcount_t IchMinLine(void) const
  436. {
  437. Assert(m_pchMinLine - m_pchBase >= 0);
  438. Assert(m_pchMinLine - m_pchBase <= LONG_MAX);
  439. return static_cast<charcount_t>(m_pchMinLine - m_pchBase - m_cMinLineMultiUnits);
  440. }
  441. // Returns the current line number
  442. charcount_t LineCur(void) { return m_line; }
  443. tokens ErrorToken() { return m_errorToken; }
  444. void SetCurrentCharacter(charcount_t offset, ULONG lineNumber = 0)
  445. {
  446. DebugOnly(m_iecpLimTokPrevious = (size_t)-1);
  447. size_t length = m_pchLast - m_pchBase;
  448. if (offset > length) offset = static_cast< charcount_t >(length);
  449. size_t ibOffset = CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, offset);
  450. m_currentCharacter = m_pchBase + ibOffset;
  451. Assert(ibOffset >= offset);
  452. RestoreMultiUnits(ibOffset - offset);
  453. m_line = lineNumber;
  454. }
  455. // IScanner methods
  456. virtual void GetErrorLineInfo(__out long& ichMin, __out long& ichLim, __out long& line, __out long& ichMinLine)
  457. {
  458. ichMin = this->IchMinError();
  459. ichLim = this->IchLimError();
  460. line = this->LineCur();
  461. ichMinLine = this->IchMinLine();
  462. if (m_ichLimError && m_ichMinError < (charcount_t)ichMinLine)
  463. {
  464. line = m_startLine;
  465. ichMinLine = UpdateLine(line, m_pchStartLine, m_pchLast, 0, ichMin);
  466. }
  467. }
  468. virtual HRESULT SysAllocErrorLine(long ichMinLine, __out BSTR* pbstrLine);
  469. charcount_t UpdateLine(long &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd);
  470. class TemporaryBuffer
  471. {
  472. friend Scanner<EncodingPolicy>;
  473. private:
  474. // Keep a reference to the scanner.
  475. // We will use it to signal an error if we fail to allocate the buffer.
  476. Scanner<EncodingPolicy>* m_pscanner;
  477. ulong m_cchMax;
  478. ulong m_ichCur;
  479. __field_ecount(m_cchMax) OLECHAR *m_prgch;
  480. byte m_rgbInit[256];
  481. public:
  482. TemporaryBuffer()
  483. {
  484. m_pscanner = nullptr;
  485. m_prgch = (OLECHAR*)m_rgbInit;
  486. m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR);
  487. m_ichCur = 0;
  488. }
  489. ~TemporaryBuffer()
  490. {
  491. if (m_prgch != (OLECHAR*)m_rgbInit)
  492. {
  493. free(m_prgch);
  494. }
  495. }
  496. void Init()
  497. {
  498. m_ichCur = 0;
  499. }
  500. void AppendCh(uint ch)
  501. {
  502. return AppendCh<true>(ch);
  503. }
  504. template<bool performAppend> void AppendCh(uint ch)
  505. {
  506. if (performAppend)
  507. {
  508. if (m_ichCur >= m_cchMax)
  509. {
  510. Grow();
  511. }
  512. Assert(m_ichCur < m_cchMax);
  513. __analysis_assume(m_ichCur < m_cchMax);
  514. m_prgch[m_ichCur++] = static_cast<OLECHAR>(ch);
  515. }
  516. }
  517. void Grow()
  518. {
  519. Assert(m_pscanner != nullptr);
  520. byte *prgbNew;
  521. byte *prgbOld = (byte *)m_prgch;
  522. unsigned long cbNew;
  523. if (FAILED(ULongMult(m_cchMax, sizeof(OLECHAR) * 2, &cbNew)))
  524. {
  525. m_pscanner->Error(ERRnoMemory);
  526. }
  527. if (prgbOld == m_rgbInit)
  528. {
  529. if (nullptr == (prgbNew = static_cast<byte*>(malloc(cbNew))))
  530. m_pscanner->Error(ERRnoMemory);
  531. js_memcpy_s(prgbNew, cbNew, prgbOld, m_ichCur * sizeof(OLECHAR));
  532. }
  533. else if (nullptr == (prgbNew = static_cast<byte*>(realloc(prgbOld, cbNew))))
  534. {
  535. m_pscanner->Error(ERRnoMemory);
  536. }
  537. m_prgch = (OLECHAR*)prgbNew;
  538. m_cchMax = cbNew / sizeof(OLECHAR);
  539. }
  540. };
  541. void Capture(_Out_ RestorePoint* restorePoint);
  542. void SeekTo(const RestorePoint& restorePoint);
  543. void SeekToForcingPid(const RestorePoint& restorePoint);
  544. void Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr);
  545. void SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId);
  546. void SetNextStringTemplateIsTagged(BOOL value)
  547. {
  548. this->m_fNextStringTemplateIsTagged = value;
  549. }
  550. private:
  551. Parser *m_parser;
  552. HashTbl *m_phtbl;
  553. Token *m_ptoken;
  554. EncodedCharPtr m_pchBase; // beginning of source
  555. EncodedCharPtr m_pchLast; // The end of source
  556. EncodedCharPtr m_pchMinLine; // beginning of current line
  557. EncodedCharPtr m_pchMinTok; // beginning of current token
  558. EncodedCharPtr m_currentCharacter; // current character
  559. EncodedCharPtr m_pchPrevLine; // beginning of previous line
  560. size_t m_cMinTokMultiUnits; // number of multi-unit characters previous to m_pchMinTok
  561. size_t m_cMinLineMultiUnits; // number of multi-unit characters previous to m_pchMinLine
  562. ErrHandler *m_perr; // error handler to use
  563. uint16 m_fStringTemplateDepth; // we should treat } as string template middle starting character (depth instead of flag)
  564. BOOL m_fHadEol;
  565. BOOL m_fIsModuleCode : 1;
  566. BOOL m_doubleQuoteOnLastTkStrCon :1;
  567. bool m_OctOrLeadingZeroOnLastTKNumber :1;
  568. BOOL m_fSyntaxColor : 1; // whether we're just syntax coloring
  569. BOOL m_EscapeOnLastTkStrCon:1;
  570. BOOL m_fNextStringTemplateIsTagged:1; // the next string template scanned has a tag (must create raw strings)
  571. BYTE m_DeferredParseFlags:2; // suppressStrPid and suppressIdPid
  572. charcount_t m_ichCheck; // character at which completion is to be computed.
  573. bool es6UnicodeMode; // True if ES6Unicode Extensions are enabled.
  574. bool m_fYieldIsKeyword; // Whether to treat 'yield' as an identifier or keyword
  575. bool m_fAwaitIsKeyword; // Whether to treat 'await' as an identifier or keyword
  576. // Temporary buffer.
  577. TemporaryBuffer m_tempChBuf;
  578. TemporaryBuffer m_tempChBufSecondary;
  579. charcount_t m_line;
  580. ScanState m_scanState;
  581. tokens m_errorToken;
  582. charcount_t m_ichMinError;
  583. charcount_t m_ichLimError;
  584. charcount_t m_startLine;
  585. EncodedCharPtr m_pchStartLine;
  586. Js::ScriptContext* m_scriptContext;
  587. const Js::CharClassifier *charClassifier;
  588. tokens m_tkPrevious;
  589. size_t m_iecpLimTokPrevious;
  590. Scanner(Parser* parser, HashTbl *phtbl, Token *ptoken, ErrHandler *perr, Js::ScriptContext *scriptContext);
  591. ~Scanner(void);
  592. template <bool forcePid>
  593. void SeekAndScan(const RestorePoint& restorePoint);
  594. tokens ScanCore(bool identifyKwds);
  595. tokens ScanAhead();
  596. tokens ScanError(EncodedCharPtr pchCur, tokens errorToken)
  597. {
  598. m_currentCharacter = pchCur;
  599. m_errorToken = errorToken;
  600. return m_ptoken->tk = tkScanError;
  601. }
  602. __declspec(noreturn) void Error(HRESULT hr)
  603. {
  604. Assert(FAILED(hr));
  605. m_pchMinTok = m_currentCharacter;
  606. m_cMinTokMultiUnits = m_cMultiUnits;
  607. AssertMem(m_perr);
  608. m_perr->Throw(hr);
  609. }
  610. const EncodedCharPtr PchBase(void)
  611. {
  612. return m_pchBase;
  613. }
  614. const EncodedCharPtr PchMinTok(void)
  615. {
  616. return m_pchMinTok;
  617. }
  618. template<bool stringTemplateMode, bool createRawString> tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);
  619. tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);
  620. tokens ScanStringTemplateBegin(EncodedCharPtr *pp);
  621. tokens ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp);
  622. void ScanNewLine(uint ch);
  623. void NotifyScannedNewLine();
  624. charcount_t LineLength(EncodedCharPtr first, EncodedCharPtr last);
  625. tokens ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp);
  626. BOOL FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last);
  627. tokens ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar, EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp);
  628. tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef);
  629. tokens ScanRegExpConstant(ArenaAllocator* alloc);
  630. tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc);
  631. BOOL oFScanNumber(double *pdbl, bool& likelyInt);
  632. EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt);
  633. IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar);
  634. IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last);
  635. ulong UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last);
  636. void SaveSrcPos(void)
  637. {
  638. m_pchMinTok = m_currentCharacter;
  639. }
  640. OLECHAR PeekNextChar(void)
  641. {
  642. return PeekFull(m_currentCharacter, m_pchLast);
  643. }
  644. OLECHAR ReadNextChar(void)
  645. {
  646. return ReadFull<true>(m_currentCharacter, m_pchLast);
  647. }
  648. EncodedCharPtr AdjustedLast() const
  649. {
  650. return m_pchLast;
  651. }
  652. size_t AdjustedLength() const
  653. {
  654. return AdjustedLast() - m_pchBase;
  655. }
  656. bool IsStrictMode() const
  657. {
  658. return this->m_parser != NULL && this->m_parser->IsStrictMode();
  659. }
  660. // This function expects the first character to be a 'u'
  661. // It will attempt to return a codepoint represented by a single escape point (either of the form \uXXXX or \u{any number of hex characters, s.t. value < 0x110000}
  662. bool TryReadEscape(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar = nullptr);
  663. template <bool bScan>
  664. bool TryReadCodePointRest(codepoint_t lower, EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar);
  665. template <bool bScan>
  666. __inline bool TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar);
  667. __inline BOOL IsIdContinueNext(EncodedCharPtr startingLocation, EncodedCharPtr endOfSource)
  668. {
  669. codepoint_t nextCodepoint;
  670. bool ignore;
  671. if (TryReadCodePoint<false>(startingLocation, endOfSource, &nextCodepoint, &ignore, &ignore))
  672. {
  673. return charClassifier->IsIdContinue(nextCodepoint);
  674. }
  675. return false;
  676. }
  677. };
  678. typedef Scanner<NullTerminatedUTF8EncodingPolicy> UTF8Scanner;