Hash.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. // StaticSym contains a string literal at the end (flexible array) and is
  7. // meant to be initialized statically. However, flexible array initialization
  8. // is not allowed in standard C++. We declare each StaticSym with length
  9. // instead and cast to common StaticSymLen<0>* (StaticSym*) to access.
  10. template <uint32 N>
  11. struct StaticSymLen
  12. {
  13. uint32 luHash;
  14. uint32 cch;
  15. OLECHAR sz[N];
  16. };
  17. typedef StaticSymLen<0> StaticSym;
  18. /***************************************************************************
  19. Hashing functions. Definitions in core\hashfunc.cpp.
  20. ***************************************************************************/
  21. ULONG CaseSensitiveComputeHash(LPCOLESTR prgch, LPCOLESTR end);
  22. ULONG CaseSensitiveComputeHash(LPCUTF8 prgch, LPCUTF8 end);
  23. ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
  24. enum
  25. {
  26. fidNil = 0x0000,
  27. fidKwdRsvd = 0x0001, // the keyword is a reserved word
  28. fidKwdFutRsvd = 0x0002, // a future reserved word, but only in strict mode
  29. fidModuleExport = 0x8000 // name is module export
  30. };
  31. struct BlockIdsStack
  32. {
  33. int id;
  34. BlockIdsStack *prev;
  35. };
  36. class Span
  37. {
  38. charcount_t m_ichMin;
  39. charcount_t m_ichLim;
  40. public:
  41. Span(): m_ichMin((charcount_t)-1), m_ichLim((charcount_t)-1) { }
  42. Span(charcount_t ichMin, charcount_t ichLim): m_ichMin(ichMin), m_ichLim(ichLim) { }
  43. charcount_t GetIchMin() { return m_ichMin; }
  44. charcount_t GetIchLim() { Assert(m_ichMin != (charcount_t)-1); return m_ichLim; }
  45. void Set(charcount_t ichMin, charcount_t ichLim)
  46. {
  47. m_ichMin = ichMin;
  48. m_ichLim = ichLim;
  49. }
  50. operator bool() { return m_ichMin != -1; }
  51. };
  52. struct PidRefStack
  53. {
  54. PidRefStack() : isAsg(false), isDynamic(false), id(0), funcId(0), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false), isUsedInLdElem(false) {}
  55. PidRefStack(int id, Js::LocalFunctionId funcId) : isAsg(false), isDynamic(false), id(id), funcId(funcId), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false), isUsedInLdElem(false) {}
  56. int GetScopeId() const { return id; }
  57. Js::LocalFunctionId GetFuncScopeId() const { return funcId; }
  58. Symbol *GetSym() const { return sym; }
  59. void SetSym(Symbol *sym) { this->sym = sym; }
  60. bool IsAssignment() const { return isAsg; }
  61. bool IsFuncAssignment() const { return isFuncAssignment; }
  62. bool IsEscape() const { return isEscape; }
  63. void SetIsEscape(bool is) { isEscape = is; }
  64. bool IsDynamicBinding() const { return isDynamic; }
  65. void SetDynamicBinding() { isDynamic = true; }
  66. bool IsUsedInLdElem() const { return isUsedInLdElem; }
  67. void SetIsUsedInLdElem(bool is) { isUsedInLdElem = is; }
  68. Symbol **GetSymRef()
  69. {
  70. return &sym;
  71. }
  72. bool isAsg;
  73. bool isDynamic;
  74. bool isModuleExport;
  75. bool isEscape;
  76. bool isFuncAssignment;
  77. bool isUsedInLdElem;
  78. int id;
  79. Js::LocalFunctionId funcId;
  80. Symbol *sym;
  81. PidRefStack *prev;
  82. };
  83. enum AssignmentState : byte {
  84. NotAssigned,
  85. AssignedOnce,
  86. AssignedMultipleTimes
  87. };
  88. struct Ident
  89. {
  90. friend class HashTbl;
  91. private:
  92. Ident * m_pidNext; // next identifier in this hash bucket
  93. PidRefStack *m_pidRefStack;
  94. ushort m_tk; // token# if identifier is a keyword
  95. ushort m_grfid; // see fidXXX above
  96. uint32 m_luHash; // hash value
  97. uint32 m_cch; // length of the identifier spelling
  98. Js::PropertyId m_propertyId;
  99. AssignmentState assignmentState;
  100. bool isUsedInLdElem;
  101. OLECHAR m_sz[]; // the spelling follows (null terminated)
  102. void SetTk(tokens tk, ushort grfid);
  103. public:
  104. LPCOLESTR Psz(void)
  105. { return m_sz; }
  106. uint32 Cch(void)
  107. { return m_cch; }
  108. tokens Tk(bool isStrictMode);
  109. uint32 Hash(void)
  110. { return m_luHash; }
  111. PidRefStack *GetTopRef() const
  112. {
  113. return m_pidRefStack;
  114. }
  115. PidRefStack *GetTopRef(uint maxBlockId) const
  116. {
  117. PidRefStack *ref;
  118. for (ref = m_pidRefStack; ref && (uint)ref->id > maxBlockId; ref = ref->prev)
  119. {
  120. ; // nothing
  121. }
  122. return ref;
  123. }
  124. void SetTopRef(PidRefStack *ref)
  125. {
  126. m_pidRefStack = ref;
  127. }
  128. void PromoteAssignmentState()
  129. {
  130. if (assignmentState == NotAssigned)
  131. {
  132. assignmentState = AssignedOnce;
  133. }
  134. else if (assignmentState == AssignedOnce)
  135. {
  136. assignmentState = AssignedMultipleTimes;
  137. }
  138. }
  139. bool IsUsedInLdElem() const
  140. {
  141. return this->isUsedInLdElem;
  142. }
  143. void SetIsUsedInLdElem(bool is)
  144. {
  145. this->isUsedInLdElem = is;
  146. }
  147. static void TrySetIsUsedInLdElem(ParseNode * pnode)
  148. {
  149. if (pnode && pnode->nop == knopStr)
  150. {
  151. pnode->sxPid.pid->SetIsUsedInLdElem(true);
  152. }
  153. }
  154. bool IsSingleAssignment()
  155. {
  156. return assignmentState == AssignedOnce;
  157. }
  158. PidRefStack *GetPidRefForScopeId(int scopeId)
  159. {
  160. PidRefStack *ref;
  161. for (ref = m_pidRefStack; ref; ref = ref->prev)
  162. {
  163. int refId = ref->GetScopeId();
  164. if (refId == scopeId)
  165. {
  166. return ref;
  167. }
  168. if (refId < scopeId)
  169. {
  170. break;
  171. }
  172. }
  173. return nullptr;
  174. }
  175. void PushPidRef(int blockId, Js::LocalFunctionId funcId, PidRefStack *newRef)
  176. {
  177. AssertMsg(blockId >= 0, "Block Id's should be greater than 0");
  178. newRef->id = blockId;
  179. newRef->funcId = funcId;
  180. newRef->prev = m_pidRefStack;
  181. m_pidRefStack = newRef;
  182. }
  183. PidRefStack * RemovePrevPidRef(PidRefStack *ref)
  184. {
  185. PidRefStack *prevRef;
  186. if (ref == nullptr)
  187. {
  188. prevRef = m_pidRefStack;
  189. Assert(prevRef);
  190. m_pidRefStack = prevRef->prev;
  191. }
  192. else
  193. {
  194. prevRef = ref->prev;
  195. Assert(prevRef);
  196. ref->prev = prevRef->prev;
  197. }
  198. return prevRef;
  199. }
  200. PidRefStack * FindOrAddPidRef(ArenaAllocator *alloc, int scopeId, Js::LocalFunctionId funcId)
  201. {
  202. // If the stack is empty, or we are pushing to the innermost scope already,
  203. // we can go ahead and push a new PidRef on the stack.
  204. if (m_pidRefStack == nullptr)
  205. {
  206. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  207. if (newRef == nullptr)
  208. {
  209. return nullptr;
  210. }
  211. newRef->prev = m_pidRefStack;
  212. m_pidRefStack = newRef;
  213. return newRef;
  214. }
  215. // Search for the corresponding PidRef, or the position to insert the new PidRef.
  216. PidRefStack *ref = m_pidRefStack;
  217. PidRefStack *prevRef = nullptr;
  218. while (1)
  219. {
  220. // We may already have a ref for this scopeId.
  221. if (ref->id == scopeId)
  222. {
  223. return ref;
  224. }
  225. if (ref->prev == nullptr || ref->id < scopeId)
  226. {
  227. // No existing PidRef for this scopeId, so create and insert one at this position.
  228. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  229. if (newRef == nullptr)
  230. {
  231. return nullptr;
  232. }
  233. if (ref->id < scopeId)
  234. {
  235. if (prevRef != nullptr)
  236. {
  237. // Param scope has a reference to the same pid as the one we are inserting into the body.
  238. // There is a another reference (prevRef), probably from an inner block in the body.
  239. // So we should insert the new reference between them.
  240. newRef->prev = prevRef->prev;
  241. prevRef->prev = newRef;
  242. }
  243. else
  244. {
  245. // When we have code like below, prevRef will be null,
  246. // function (a = x) { var x = 1; }
  247. newRef->prev = m_pidRefStack;
  248. m_pidRefStack = newRef;
  249. }
  250. }
  251. else
  252. {
  253. newRef->prev = ref->prev;
  254. ref->prev = newRef;
  255. }
  256. return newRef;
  257. }
  258. prevRef = ref;
  259. ref = ref->prev;
  260. }
  261. }
  262. Js::PropertyId GetPropertyId() const { return m_propertyId; }
  263. void SetPropertyId(Js::PropertyId id) { m_propertyId = id; }
  264. void SetIsModuleExport() { m_grfid |= fidModuleExport; }
  265. BOOL GetIsModuleExport() const { return m_grfid & fidModuleExport; }
  266. static tokens TkFromNameLen(uint32 luHash, _In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode, ushort * pgrfid, ushort * ptk);
  267. #if DBG
  268. static tokens TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode);
  269. #endif
  270. };
  271. /*****************************************************************************/
  272. class HashTbl
  273. {
  274. public:
  275. static HashTbl * Create(uint cidHash);
  276. void Release(void)
  277. {
  278. delete this; // invokes overridden operator delete
  279. }
  280. BOOL TokIsBinop(tokens tk, int *popl, OpCode *pnop)
  281. {
  282. const KWD *pkwd = KwdOfTok(tk);
  283. if (nullptr == pkwd)
  284. return FALSE;
  285. *popl = pkwd->prec2;
  286. *pnop = pkwd->nop2;
  287. return TRUE;
  288. }
  289. BOOL TokIsUnop(tokens tk, int *popl, OpCode *pnop)
  290. {
  291. const KWD *pkwd = KwdOfTok(tk);
  292. if (nullptr == pkwd)
  293. return FALSE;
  294. *popl = pkwd->prec1;
  295. *pnop = pkwd->nop1;
  296. return TRUE;
  297. }
  298. IdentPtr PidFromTk(tokens tk);
  299. IdentPtr PidHashName(LPCOLESTR psz)
  300. {
  301. size_t csz = wcslen(psz);
  302. Assert(csz <= ULONG_MAX);
  303. return PidHashNameLen(psz, static_cast<uint32>(csz));
  304. }
  305. template <typename CharType>
  306. IdentPtr PidHashNameLen(CharType const * psz, CharType const * end, uint32 cch);
  307. template <typename CharType>
  308. IdentPtr PidHashNameLen(CharType const * psz, uint32 cch);
  309. template <typename CharType>
  310. IdentPtr PidHashNameLenWithHash(_In_reads_(cch) CharType const * psz, CharType const * end, int32 cch, uint32 luHash);
  311. template <typename CharType>
  312. inline IdentPtr FindExistingPid(
  313. CharType const * prgch,
  314. CharType const * end,
  315. int32 cch,
  316. uint32 luHash,
  317. IdentPtr **pppInsert,
  318. int32 *pBucketCount
  319. #if PROFILE_DICTIONARY
  320. , int& depth
  321. #endif
  322. );
  323. NoReleaseAllocator* GetAllocator() {return &m_noReleaseAllocator;}
  324. bool Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch);
  325. template<typename Fn>
  326. void VisitPids(Fn fn)
  327. {
  328. for (uint i = 0; i <= m_luMask; i++)
  329. {
  330. for (IdentPtr pid = m_prgpidName[i]; pid; pid = pid->m_pidNext)
  331. {
  332. fn(pid);
  333. }
  334. }
  335. }
  336. private:
  337. NoReleaseAllocator m_noReleaseAllocator; // to allocate identifiers
  338. Ident ** m_prgpidName; // hash table for names
  339. uint32 m_luMask; // hash mask
  340. uint32 m_luCount; // count of the number of entires in the hash table
  341. IdentPtr m_rpid[tkLimKwd];
  342. HashTbl()
  343. {
  344. m_prgpidName = nullptr;
  345. memset(&m_rpid, 0, sizeof(m_rpid));
  346. }
  347. ~HashTbl(void) {}
  348. void operator delete(void* p, size_t size)
  349. {
  350. HeapFree(p, size);
  351. }
  352. // Called to grow the number of buckets in the table to reduce the table density.
  353. void Grow();
  354. // Automatically grow the table if a bucket's length grows beyond BucketLengthLimit and the table is densely populated.
  355. static const uint BucketLengthLimit = 5;
  356. // When growing the bucket size we'll grow by GrowFactor. GrowFactor MUST be a power of 2.
  357. static const uint GrowFactor = 4;
  358. #if DEBUG
  359. uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
  360. #endif
  361. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  362. {
  363. return memcmp(psz1, psz2, (psz2end - psz2) * sizeof(OLECHAR)) == 0;
  364. }
  365. static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  366. {
  367. return utf8::CharsAreEqual(psz1, psz2, psz2end, utf8::doAllowThreeByteSurrogates);
  368. }
  369. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) char const * psz2, char const * psz2end)
  370. {
  371. while (psz2 < psz2end)
  372. {
  373. if (*psz1++ != *psz2++)
  374. {
  375. return false;
  376. }
  377. }
  378. return true;
  379. }
  380. static void CopyString(__in_ecount((psz2end - psz2) + 1) LPOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  381. {
  382. size_t cch = psz2end - psz2;
  383. js_memcpy_s(psz1, cch * sizeof(OLECHAR), psz2, cch * sizeof(OLECHAR));
  384. psz1[cch] = 0;
  385. }
  386. static void CopyString(LPOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  387. {
  388. utf8::DecodeUnitsIntoAndNullTerminate(psz1, psz2, psz2end);
  389. }
  390. static void CopyString(LPOLESTR psz1, char const * psz2, char const * psz2end)
  391. {
  392. while (psz2 < psz2end)
  393. {
  394. *(psz1++) = *psz2++;
  395. }
  396. *psz1 = 0;
  397. }
  398. // note: on failure this may throw or return FALSE, depending on
  399. // where the failure occurred.
  400. BOOL Init(uint cidHash);
  401. /*************************************************************************/
  402. /* The following members are related to the keyword descriptor tables */
  403. /*************************************************************************/
  404. struct KWD
  405. {
  406. OpCode nop2;
  407. byte prec2;
  408. OpCode nop1;
  409. byte prec1;
  410. };
  411. struct ReservedWordInfo
  412. {
  413. StaticSym const * sym;
  414. ushort grfid;
  415. };
  416. static const ReservedWordInfo s_reservedWordInfo[tkID];
  417. static const KWD g_mptkkwd[tkLimKwd];
  418. static const KWD * KwdOfTok(tokens tk)
  419. { return (unsigned int)tk < tkLimKwd ? g_mptkkwd + tk : nullptr; }
  420. static __declspec(noreturn) void OutOfMemory();
  421. #if PROFILE_DICTIONARY
  422. DictionaryStats *stats;
  423. #endif
  424. };