Hash.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. // StaticSym contains a string literal at the end (flexible array) and is
  7. // meant to be initialized statically. However, flexible array initialization
  8. // is not allowed in standard C++. We declare each StaticSym with length
  9. // instead and cast to common StaticSymLen<0>* (StaticSym*) to access.
  10. template <uint32 N>
  11. struct StaticSymLen
  12. {
  13. uint32 luHash;
  14. uint32 cch;
  15. OLECHAR sz[N];
  16. };
  17. typedef StaticSymLen<0> StaticSym;
  18. /***************************************************************************
  19. Hashing functions. Definitions in core\hashfunc.cpp.
  20. ***************************************************************************/
  21. ULONG CaseSensitiveComputeHash(LPCOLESTR prgch, LPCOLESTR end);
  22. ULONG CaseSensitiveComputeHash(LPCUTF8 prgch, LPCUTF8 end);
  23. ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
  24. enum
  25. {
  26. fidNil = 0x0000,
  27. fidKwdRsvd = 0x0001, // the keyword is a reserved word
  28. fidKwdFutRsvd = 0x0002, // a future reserved word, but only in strict mode
  29. // Flags to identify tracked aliases of "eval"
  30. fidEval = 0x0008,
  31. // Flags to identify tracked aliases of "let"
  32. fidLetOrConst = 0x0010, // ID has previously been used in a block-scoped declaration
  33. // This flag is used by the Parser CountDcls and FillDcls methods.
  34. // CountDcls sets the bit as it walks through the var decls so that
  35. // it can skip duplicates. FillDcls clears the bit as it walks through
  36. // again to skip duplicates.
  37. fidGlobalDcl = 0x2000,
  38. fidUsed = 0x4000, // name referenced by source code
  39. fidModuleExport = 0x8000 // name is module export
  40. };
  41. struct BlockIdsStack
  42. {
  43. int id;
  44. BlockIdsStack *prev;
  45. };
  46. class Span
  47. {
  48. charcount_t m_ichMin;
  49. charcount_t m_ichLim;
  50. public:
  51. Span(): m_ichMin((charcount_t)-1), m_ichLim((charcount_t)-1) { }
  52. Span(charcount_t ichMin, charcount_t ichLim): m_ichMin(ichMin), m_ichLim(ichLim) { }
  53. charcount_t GetIchMin() { return m_ichMin; }
  54. charcount_t GetIchLim() { Assert(m_ichMin != (charcount_t)-1); return m_ichLim; }
  55. void Set(charcount_t ichMin, charcount_t ichLim)
  56. {
  57. m_ichMin = ichMin;
  58. m_ichLim = ichLim;
  59. }
  60. operator bool() { return m_ichMin != -1; }
  61. };
  62. struct PidRefStack
  63. {
  64. PidRefStack() : isAsg(false), isDynamic(false), id(0), funcId(0), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false), isUsedInLdElem(false) {}
  65. PidRefStack(int id, Js::LocalFunctionId funcId) : isAsg(false), isDynamic(false), id(id), funcId(funcId), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false), isUsedInLdElem(false) {}
  66. int GetScopeId() const { return id; }
  67. Js::LocalFunctionId GetFuncScopeId() const { return funcId; }
  68. Symbol *GetSym() const { return sym; }
  69. void SetSym(Symbol *sym) { this->sym = sym; }
  70. bool IsAssignment() const { return isAsg; }
  71. bool IsFuncAssignment() const { return isFuncAssignment; }
  72. bool IsEscape() const { return isEscape; }
  73. void SetIsEscape(bool is) { isEscape = is; }
  74. bool IsDynamicBinding() const { return isDynamic; }
  75. void SetDynamicBinding() { isDynamic = true; }
  76. bool IsUsedInLdElem() const { return isUsedInLdElem; }
  77. void SetIsUsedInLdElem(bool is) { isUsedInLdElem = is; }
  78. Symbol **GetSymRef()
  79. {
  80. return &sym;
  81. }
  82. bool isAsg;
  83. bool isDynamic;
  84. bool isModuleExport;
  85. bool isEscape;
  86. bool isFuncAssignment;
  87. bool isUsedInLdElem;
  88. int id;
  89. Js::LocalFunctionId funcId;
  90. Symbol *sym;
  91. PidRefStack *prev;
  92. };
  93. enum AssignmentState : byte {
  94. NotAssigned,
  95. AssignedOnce,
  96. AssignedMultipleTimes
  97. };
  98. struct Ident
  99. {
  100. friend class HashTbl;
  101. private:
  102. Ident * m_pidNext; // next identifier in this hash bucket
  103. PidRefStack *m_pidRefStack;
  104. ushort m_tk; // token# if identifier is a keyword
  105. ushort m_grfid; // see fidXXX above
  106. uint32 m_luHash; // hash value
  107. uint32 m_cch; // length of the identifier spelling
  108. Js::PropertyId m_propertyId;
  109. AssignmentState assignmentState;
  110. bool isUsedInLdElem;
  111. OLECHAR m_sz[]; // the spelling follows (null terminated)
  112. void SetTk(tokens tk, ushort grfid);
  113. public:
  114. LPCOLESTR Psz(void)
  115. { return m_sz; }
  116. uint32 Cch(void)
  117. { return m_cch; }
  118. tokens Tk(bool isStrictMode);
  119. uint32 Hash(void)
  120. { return m_luHash; }
  121. PidRefStack *GetTopRef() const
  122. {
  123. return m_pidRefStack;
  124. }
  125. PidRefStack *GetTopRef(uint maxBlockId) const
  126. {
  127. PidRefStack *ref;
  128. for (ref = m_pidRefStack; ref && (uint)ref->id > maxBlockId; ref = ref->prev)
  129. {
  130. ; // nothing
  131. }
  132. return ref;
  133. }
  134. void SetTopRef(PidRefStack *ref)
  135. {
  136. m_pidRefStack = ref;
  137. }
  138. void PromoteAssignmentState()
  139. {
  140. if (assignmentState == NotAssigned)
  141. {
  142. assignmentState = AssignedOnce;
  143. }
  144. else if (assignmentState == AssignedOnce)
  145. {
  146. assignmentState = AssignedMultipleTimes;
  147. }
  148. }
  149. bool IsUsedInLdElem() const
  150. {
  151. return this->isUsedInLdElem;
  152. }
  153. void SetIsUsedInLdElem(bool is)
  154. {
  155. this->isUsedInLdElem = is;
  156. }
  157. static void TrySetIsUsedInLdElem(ParseNode * pnode)
  158. {
  159. if (pnode && pnode->nop == knopStr)
  160. {
  161. pnode->sxPid.pid->SetIsUsedInLdElem(true);
  162. }
  163. }
  164. bool IsSingleAssignment()
  165. {
  166. return assignmentState == AssignedOnce;
  167. }
  168. PidRefStack *GetPidRefForScopeId(int scopeId)
  169. {
  170. PidRefStack *ref;
  171. for (ref = m_pidRefStack; ref; ref = ref->prev)
  172. {
  173. int refId = ref->GetScopeId();
  174. if (refId == scopeId)
  175. {
  176. return ref;
  177. }
  178. if (refId < scopeId)
  179. {
  180. break;
  181. }
  182. }
  183. return nullptr;
  184. }
  185. void PushPidRef(int blockId, Js::LocalFunctionId funcId, PidRefStack *newRef)
  186. {
  187. AssertMsg(blockId >= 0, "Block Id's should be greater than 0");
  188. newRef->id = blockId;
  189. newRef->funcId = funcId;
  190. newRef->prev = m_pidRefStack;
  191. m_pidRefStack = newRef;
  192. }
  193. PidRefStack * RemovePrevPidRef(PidRefStack *ref)
  194. {
  195. PidRefStack *prevRef;
  196. if (ref == nullptr)
  197. {
  198. prevRef = m_pidRefStack;
  199. Assert(prevRef);
  200. m_pidRefStack = prevRef->prev;
  201. }
  202. else
  203. {
  204. prevRef = ref->prev;
  205. Assert(prevRef);
  206. ref->prev = prevRef->prev;
  207. }
  208. return prevRef;
  209. }
  210. PidRefStack * FindOrAddPidRef(ArenaAllocator *alloc, int scopeId, Js::LocalFunctionId funcId)
  211. {
  212. // If the stack is empty, or we are pushing to the innermost scope already,
  213. // we can go ahead and push a new PidRef on the stack.
  214. if (m_pidRefStack == nullptr)
  215. {
  216. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  217. if (newRef == nullptr)
  218. {
  219. return nullptr;
  220. }
  221. newRef->prev = m_pidRefStack;
  222. m_pidRefStack = newRef;
  223. return newRef;
  224. }
  225. // Search for the corresponding PidRef, or the position to insert the new PidRef.
  226. PidRefStack *ref = m_pidRefStack;
  227. PidRefStack *prevRef = nullptr;
  228. while (1)
  229. {
  230. // We may already have a ref for this scopeId.
  231. if (ref->id == scopeId)
  232. {
  233. return ref;
  234. }
  235. if (ref->prev == nullptr || ref->id < scopeId)
  236. {
  237. // No existing PidRef for this scopeId, so create and insert one at this position.
  238. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  239. if (newRef == nullptr)
  240. {
  241. return nullptr;
  242. }
  243. if (ref->id < scopeId)
  244. {
  245. if (prevRef != nullptr)
  246. {
  247. // Param scope has a reference to the same pid as the one we are inserting into the body.
  248. // There is a another reference (prevRef), probably from an inner block in the body.
  249. // So we should insert the new reference between them.
  250. newRef->prev = prevRef->prev;
  251. prevRef->prev = newRef;
  252. }
  253. else
  254. {
  255. // When we have code like below, prevRef will be null,
  256. // function (a = x) { var x = 1; }
  257. newRef->prev = m_pidRefStack;
  258. m_pidRefStack = newRef;
  259. }
  260. }
  261. else
  262. {
  263. newRef->prev = ref->prev;
  264. ref->prev = newRef;
  265. }
  266. return newRef;
  267. }
  268. Assert(ref->prev->id <= ref->id);
  269. prevRef = ref;
  270. ref = ref->prev;
  271. }
  272. }
  273. Js::PropertyId GetPropertyId() const { return m_propertyId; }
  274. void SetPropertyId(Js::PropertyId id) { m_propertyId = id; }
  275. void SetIsEval() { m_grfid |= fidEval; }
  276. BOOL GetIsEval() const { return m_grfid & fidEval; }
  277. void SetIsLetOrConst() { m_grfid |= fidLetOrConst; }
  278. BOOL GetIsLetOrConst() const { return m_grfid & fidLetOrConst; }
  279. void SetIsModuleExport() { m_grfid |= fidModuleExport; }
  280. BOOL GetIsModuleExport() const { return m_grfid & fidModuleExport; }
  281. };
  282. /*****************************************************************************/
  283. class HashTbl
  284. {
  285. public:
  286. static HashTbl * Create(uint cidHash);
  287. void Release(void)
  288. {
  289. delete this; // invokes overrided operator delete
  290. }
  291. BOOL TokIsBinop(tokens tk, int *popl, OpCode *pnop)
  292. {
  293. const KWD *pkwd = KwdOfTok(tk);
  294. if (nullptr == pkwd)
  295. return FALSE;
  296. *popl = pkwd->prec2;
  297. *pnop = pkwd->nop2;
  298. return TRUE;
  299. }
  300. BOOL TokIsUnop(tokens tk, int *popl, OpCode *pnop)
  301. {
  302. const KWD *pkwd = KwdOfTok(tk);
  303. if (nullptr == pkwd)
  304. return FALSE;
  305. *popl = pkwd->prec1;
  306. *pnop = pkwd->nop1;
  307. return TRUE;
  308. }
  309. IdentPtr PidFromTk(tokens tk);
  310. IdentPtr PidHashName(LPCOLESTR psz)
  311. {
  312. size_t csz = wcslen(psz);
  313. Assert(csz <= ULONG_MAX);
  314. return PidHashNameLen(psz, static_cast<uint32>(csz));
  315. }
  316. template <typename CharType>
  317. IdentPtr PidHashNameLen(CharType const * psz, CharType const * end, uint32 cch);
  318. template <typename CharType>
  319. IdentPtr PidHashNameLen(CharType const * psz, uint32 cch);
  320. template <typename CharType>
  321. IdentPtr PidHashNameLenWithHash(_In_reads_(cch) CharType const * psz, CharType const * end, int32 cch, uint32 luHash);
  322. template <typename CharType>
  323. inline IdentPtr FindExistingPid(
  324. CharType const * prgch,
  325. CharType const * end,
  326. int32 cch,
  327. uint32 luHash,
  328. IdentPtr **pppInsert,
  329. int32 *pBucketCount
  330. #if PROFILE_DICTIONARY
  331. , int& depth
  332. #endif
  333. );
  334. tokens TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode);
  335. NoReleaseAllocator* GetAllocator() {return &m_noReleaseAllocator;}
  336. bool Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch);
  337. template<typename Fn>
  338. void VisitPids(Fn fn)
  339. {
  340. for (uint i = 0; i <= m_luMask; i++)
  341. {
  342. for (IdentPtr pid = m_prgpidName[i]; pid; pid = pid->m_pidNext)
  343. {
  344. fn(pid);
  345. }
  346. }
  347. }
  348. private:
  349. NoReleaseAllocator m_noReleaseAllocator; // to allocate identifiers
  350. Ident ** m_prgpidName; // hash table for names
  351. uint32 m_luMask; // hash mask
  352. uint32 m_luCount; // count of the number of entires in the hash table
  353. IdentPtr m_rpid[tkLimKwd];
  354. HashTbl()
  355. {
  356. m_prgpidName = nullptr;
  357. memset(&m_rpid, 0, sizeof(m_rpid));
  358. }
  359. ~HashTbl(void) {}
  360. void operator delete(void* p, size_t size)
  361. {
  362. HeapFree(p, size);
  363. }
  364. // Called to grow the number of buckets in the table to reduce the table density.
  365. void Grow();
  366. // Automatically grow the table if a bucket's length grows beyond BucketLengthLimit and the table is densely populated.
  367. static const uint BucketLengthLimit = 5;
  368. // When growing the bucket size we'll grow by GrowFactor. GrowFactor MUST be a power of 2.
  369. static const uint GrowFactor = 4;
  370. #if DEBUG
  371. uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
  372. #endif
  373. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  374. {
  375. return memcmp(psz1, psz2, (psz2end - psz2) * sizeof(OLECHAR)) == 0;
  376. }
  377. static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  378. {
  379. return utf8::CharsAreEqual(psz1, psz2, psz2end, utf8::doAllowThreeByteSurrogates);
  380. }
  381. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) char const * psz2, char const * psz2end)
  382. {
  383. while (psz2 < psz2end)
  384. {
  385. if (*psz1++ != *psz2++)
  386. {
  387. return false;
  388. }
  389. }
  390. return true;
  391. }
  392. static void CopyString(__in_ecount((psz2end - psz2) + 1) LPOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  393. {
  394. size_t cch = psz2end - psz2;
  395. js_memcpy_s(psz1, cch * sizeof(OLECHAR), psz2, cch * sizeof(OLECHAR));
  396. psz1[cch] = 0;
  397. }
  398. static void CopyString(LPOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  399. {
  400. utf8::DecodeUnitsIntoAndNullTerminate(psz1, psz2, psz2end);
  401. }
  402. static void CopyString(LPOLESTR psz1, char const * psz2, char const * psz2end)
  403. {
  404. while (psz2 < psz2end)
  405. {
  406. *(psz1++) = *psz2++;
  407. }
  408. *psz1 = 0;
  409. }
  410. // note: on failure this may throw or return FALSE, depending on
  411. // where the failure occurred.
  412. BOOL Init(uint cidHash);
  413. /*************************************************************************/
  414. /* The following members are related to the keyword descriptor tables */
  415. /*************************************************************************/
  416. struct KWD
  417. {
  418. OpCode nop2;
  419. byte prec2;
  420. OpCode nop1;
  421. byte prec1;
  422. };
  423. struct ReservedWordInfo
  424. {
  425. StaticSym const * sym;
  426. ushort grfid;
  427. };
  428. static const ReservedWordInfo s_reservedWordInfo[tkID];
  429. static const KWD g_mptkkwd[tkLimKwd];
  430. static const KWD * KwdOfTok(tokens tk)
  431. { return (unsigned int)tk < tkLimKwd ? g_mptkkwd + tk : nullptr; }
  432. static __declspec(noreturn) void OutOfMemory();
  433. #if PROFILE_DICTIONARY
  434. DictionaryStats *stats;
  435. #endif
  436. };