Hash.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. // StaticSym contains a string literal at the end (flexible array) and is
  7. // meant to be initialized statically. However, flexible array initialization
  8. // is not allowed in standard C++. We declare each StaticSym with length
  9. // instead and cast to common StaticSymLen<0>* (StaticSym*) to access.
  10. template <uint32 N>
  11. struct StaticSymLen
  12. {
  13. uint32 luHash;
  14. uint32 cch;
  15. OLECHAR sz[N];
  16. };
  17. typedef StaticSymLen<0> StaticSym;
  18. /***************************************************************************
  19. Hashing functions. Definitions in core\hashfunc.cpp.
  20. ***************************************************************************/
  21. ULONG CaseSensitiveComputeHash(LPCOLESTR prgch, LPCOLESTR end);
  22. ULONG CaseSensitiveComputeHash(LPCUTF8 prgch, LPCUTF8 end);
  23. ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
  24. enum
  25. {
  26. fidNil = 0x0000,
  27. fidKwdRsvd = 0x0001, // the keyword is a reserved word
  28. fidKwdFutRsvd = 0x0002, // a future reserved word, but only in strict mode
  29. fidModuleExport = 0x8000 // name is module export
  30. };
  31. struct BlockIdsStack
  32. {
  33. int id;
  34. BlockIdsStack *prev;
  35. };
  36. class Span
  37. {
  38. charcount_t m_ichMin;
  39. charcount_t m_ichLim;
  40. public:
  41. Span(): m_ichMin((charcount_t)-1), m_ichLim((charcount_t)-1) { }
  42. Span(charcount_t ichMin, charcount_t ichLim): m_ichMin(ichMin), m_ichLim(ichLim) { }
  43. charcount_t GetIchMin() { return m_ichMin; }
  44. charcount_t GetIchLim() { Assert(m_ichMin != (charcount_t)-1); return m_ichLim; }
  45. void Set(charcount_t ichMin, charcount_t ichLim)
  46. {
  47. m_ichMin = ichMin;
  48. m_ichLim = ichLim;
  49. }
  50. operator bool() { return m_ichMin != -1; }
  51. };
  52. struct PidRefStack
  53. {
  54. PidRefStack() : isAsg(false), isDynamic(false), id(0), funcId(0), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false), isUsedInLdElem(false) {}
  55. PidRefStack(int id, Js::LocalFunctionId funcId) : isAsg(false), isDynamic(false), id(id), funcId(funcId), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false), isUsedInLdElem(false) {}
  56. int GetScopeId() const { return id; }
  57. Js::LocalFunctionId GetFuncScopeId() const { return funcId; }
  58. Symbol *GetSym() const { return sym; }
  59. void SetSym(Symbol *sym) { this->sym = sym; }
  60. bool IsAssignment() const { return isAsg; }
  61. bool IsFuncAssignment() const { return isFuncAssignment; }
  62. bool IsEscape() const { return isEscape; }
  63. void SetIsEscape(bool is) { isEscape = is; }
  64. bool IsDynamicBinding() const { return isDynamic; }
  65. void SetDynamicBinding() { isDynamic = true; }
  66. bool IsUsedInLdElem() const { return isUsedInLdElem; }
  67. void SetIsUsedInLdElem(bool is) { isUsedInLdElem = is; }
  68. Symbol **GetSymRef()
  69. {
  70. return &sym;
  71. }
  72. bool isAsg;
  73. bool isDynamic;
  74. bool isModuleExport;
  75. bool isEscape;
  76. bool isFuncAssignment;
  77. bool isUsedInLdElem;
  78. int id;
  79. Js::LocalFunctionId funcId;
  80. Symbol *sym;
  81. PidRefStack *prev;
  82. };
  83. enum AssignmentState : byte {
  84. NotAssigned,
  85. AssignedOnce,
  86. AssignedMultipleTimes
  87. };
  88. struct Ident
  89. {
  90. friend class HashTbl;
  91. private:
  92. Ident * m_pidNext; // next identifier in this hash bucket
  93. PidRefStack *m_pidRefStack;
  94. ushort m_tk; // token# if identifier is a keyword
  95. ushort m_grfid; // see fidXXX above
  96. uint32 m_luHash; // hash value
  97. uint32 m_cch; // length of the identifier spelling
  98. Js::PropertyId m_propertyId;
  99. AssignmentState assignmentState;
  100. bool isUsedInLdElem;
  101. OLECHAR m_sz[]; // the spelling follows (null terminated)
  102. void SetTk(tokens tk, ushort grfid);
  103. public:
  104. LPCOLESTR Psz(void)
  105. { return m_sz; }
  106. uint32 Cch(void)
  107. { return m_cch; }
  108. tokens Tk(bool isStrictMode);
  109. uint32 Hash(void)
  110. { return m_luHash; }
  111. PidRefStack *GetTopRef() const
  112. {
  113. return m_pidRefStack;
  114. }
  115. PidRefStack *GetTopRef(uint maxBlockId) const
  116. {
  117. PidRefStack *ref;
  118. for (ref = m_pidRefStack; ref && (uint)ref->id > maxBlockId; ref = ref->prev)
  119. {
  120. ; // nothing
  121. }
  122. return ref;
  123. }
  124. void SetTopRef(PidRefStack *ref)
  125. {
  126. m_pidRefStack = ref;
  127. }
  128. void PromoteAssignmentState()
  129. {
  130. if (assignmentState == NotAssigned)
  131. {
  132. assignmentState = AssignedOnce;
  133. }
  134. else if (assignmentState == AssignedOnce)
  135. {
  136. assignmentState = AssignedMultipleTimes;
  137. }
  138. }
  139. bool IsUsedInLdElem() const
  140. {
  141. return this->isUsedInLdElem;
  142. }
  143. void SetIsUsedInLdElem(bool is)
  144. {
  145. this->isUsedInLdElem = is;
  146. }
  147. static void TrySetIsUsedInLdElem(ParseNode * pnode);
  148. bool IsSingleAssignment()
  149. {
  150. return assignmentState == AssignedOnce;
  151. }
  152. PidRefStack *GetPidRefForScopeId(int scopeId)
  153. {
  154. PidRefStack *ref;
  155. for (ref = m_pidRefStack; ref; ref = ref->prev)
  156. {
  157. int refId = ref->GetScopeId();
  158. if (refId == scopeId)
  159. {
  160. return ref;
  161. }
  162. if (refId < scopeId)
  163. {
  164. break;
  165. }
  166. }
  167. return nullptr;
  168. }
  169. void PushPidRef(int blockId, Js::LocalFunctionId funcId, PidRefStack *newRef)
  170. {
  171. AssertMsg(blockId >= 0, "Block Id's should be greater than 0");
  172. newRef->id = blockId;
  173. newRef->funcId = funcId;
  174. newRef->prev = m_pidRefStack;
  175. m_pidRefStack = newRef;
  176. }
  177. PidRefStack * RemovePrevPidRef(PidRefStack *ref)
  178. {
  179. PidRefStack *prevRef;
  180. if (ref == nullptr)
  181. {
  182. prevRef = m_pidRefStack;
  183. Assert(prevRef);
  184. m_pidRefStack = prevRef->prev;
  185. }
  186. else
  187. {
  188. prevRef = ref->prev;
  189. Assert(prevRef);
  190. ref->prev = prevRef->prev;
  191. }
  192. return prevRef;
  193. }
  194. PidRefStack * FindOrAddPidRef(ArenaAllocator *alloc, int scopeId, Js::LocalFunctionId funcId)
  195. {
  196. // If the stack is empty, or we are pushing to the innermost scope already,
  197. // we can go ahead and push a new PidRef on the stack.
  198. if (m_pidRefStack == nullptr)
  199. {
  200. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  201. if (newRef == nullptr)
  202. {
  203. return nullptr;
  204. }
  205. newRef->prev = m_pidRefStack;
  206. m_pidRefStack = newRef;
  207. return newRef;
  208. }
  209. // Search for the corresponding PidRef, or the position to insert the new PidRef.
  210. PidRefStack *ref = m_pidRefStack;
  211. PidRefStack *prevRef = nullptr;
  212. while (1)
  213. {
  214. // We may already have a ref for this scopeId.
  215. if (ref->id == scopeId)
  216. {
  217. return ref;
  218. }
  219. if (ref->prev == nullptr || ref->id < scopeId)
  220. {
  221. // No existing PidRef for this scopeId, so create and insert one at this position.
  222. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  223. if (newRef == nullptr)
  224. {
  225. return nullptr;
  226. }
  227. if (ref->id < scopeId)
  228. {
  229. if (prevRef != nullptr)
  230. {
  231. // Param scope has a reference to the same pid as the one we are inserting into the body.
  232. // There is a another reference (prevRef), probably from an inner block in the body.
  233. // So we should insert the new reference between them.
  234. newRef->prev = prevRef->prev;
  235. prevRef->prev = newRef;
  236. }
  237. else
  238. {
  239. // When we have code like below, prevRef will be null,
  240. // function (a = x) { var x = 1; }
  241. newRef->prev = m_pidRefStack;
  242. m_pidRefStack = newRef;
  243. }
  244. }
  245. else
  246. {
  247. newRef->prev = ref->prev;
  248. ref->prev = newRef;
  249. }
  250. return newRef;
  251. }
  252. prevRef = ref;
  253. ref = ref->prev;
  254. }
  255. }
  256. Js::PropertyId GetPropertyId() const { return m_propertyId; }
  257. void SetPropertyId(Js::PropertyId id) { m_propertyId = id; }
  258. void SetIsModuleExport() { m_grfid |= fidModuleExport; }
  259. BOOL GetIsModuleExport() const { return m_grfid & fidModuleExport; }
  260. static tokens TkFromNameLen(uint32 luHash, _In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode, ushort * pgrfid, ushort * ptk);
  261. #if DBG
  262. static tokens TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode);
  263. #endif
  264. };
  265. /*****************************************************************************/
  266. class HashTbl
  267. {
  268. public:
  269. HashTbl(uint cidHash = DEFAULT_HASH_TABLE_SIZE)
  270. {
  271. AssertCanHandleOutOfMemory();
  272. m_prgpidName = nullptr;
  273. memset(&m_rpid, 0, sizeof(m_rpid));
  274. if (!Init(cidHash))
  275. {
  276. Js::Throw::OutOfMemory();
  277. }
  278. }
  279. ~HashTbl(void) {}
  280. BOOL TokIsBinop(tokens tk, int *popl, OpCode *pnop)
  281. {
  282. const KWD *pkwd = KwdOfTok(tk);
  283. if (nullptr == pkwd)
  284. return FALSE;
  285. *popl = pkwd->prec2;
  286. *pnop = pkwd->nop2;
  287. return TRUE;
  288. }
  289. BOOL TokIsUnop(tokens tk, int *popl, OpCode *pnop)
  290. {
  291. const KWD *pkwd = KwdOfTok(tk);
  292. if (nullptr == pkwd)
  293. return FALSE;
  294. *popl = pkwd->prec1;
  295. *pnop = pkwd->nop1;
  296. return TRUE;
  297. }
  298. IdentPtr PidFromTk(tokens tk);
  299. IdentPtr PidHashName(LPCOLESTR psz)
  300. {
  301. size_t csz = wcslen(psz);
  302. Assert(csz <= ULONG_MAX);
  303. return PidHashNameLen(psz, static_cast<uint32>(csz));
  304. }
  305. template <typename CharType>
  306. IdentPtr PidHashNameLen(CharType const * psz, CharType const * end, uint32 cch);
  307. template <typename CharType>
  308. IdentPtr PidHashNameLen(CharType const * psz, uint32 cch);
  309. template <typename CharType>
  310. IdentPtr PidHashNameLenWithHash(_In_reads_(cch) CharType const * psz, CharType const * end, int32 cch, uint32 luHash);
  311. template <typename CharType>
  312. inline IdentPtr FindExistingPid(
  313. CharType const * prgch,
  314. CharType const * end,
  315. int32 cch,
  316. uint32 luHash,
  317. IdentPtr **pppInsert,
  318. int32 *pBucketCount
  319. #if PROFILE_DICTIONARY
  320. , int& depth
  321. #endif
  322. );
  323. NoReleaseAllocator* GetAllocator() {return &m_noReleaseAllocator;}
  324. bool Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch);
  325. template<typename Fn>
  326. void VisitPids(Fn fn)
  327. {
  328. for (uint i = 0; i <= m_luMask; i++)
  329. {
  330. for (IdentPtr pid = m_prgpidName[i]; pid; pid = pid->m_pidNext)
  331. {
  332. fn(pid);
  333. }
  334. }
  335. }
  336. private:
  337. static const uint DEFAULT_HASH_TABLE_SIZE = 256;
  338. NoReleaseAllocator m_noReleaseAllocator; // to allocate identifiers
  339. Ident ** m_prgpidName; // hash table for names
  340. uint32 m_luMask; // hash mask
  341. uint32 m_luCount; // count of the number of entires in the hash table
  342. IdentPtr m_rpid[tkLimKwd];
  343. // Called to grow the number of buckets in the table to reduce the table density.
  344. void Grow();
  345. // Automatically grow the table if a bucket's length grows beyond BucketLengthLimit and the table is densely populated.
  346. static const uint BucketLengthLimit = 5;
  347. // When growing the bucket size we'll grow by GrowFactor. GrowFactor MUST be a power of 2.
  348. static const uint GrowFactor = 4;
  349. #if DEBUG
  350. uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
  351. #endif
  352. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  353. {
  354. return memcmp(psz1, psz2, (psz2end - psz2) * sizeof(OLECHAR)) == 0;
  355. }
  356. static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  357. {
  358. return utf8::CharsAreEqual(psz1, psz2, psz2end, utf8::doAllowThreeByteSurrogates);
  359. }
  360. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) char const * psz2, char const * psz2end)
  361. {
  362. while (psz2 < psz2end)
  363. {
  364. if (*psz1++ != *psz2++)
  365. {
  366. return false;
  367. }
  368. }
  369. return true;
  370. }
  371. static void CopyString(__in_ecount((psz2end - psz2) + 1) LPOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  372. {
  373. size_t cch = psz2end - psz2;
  374. js_memcpy_s(psz1, cch * sizeof(OLECHAR), psz2, cch * sizeof(OLECHAR));
  375. psz1[cch] = 0;
  376. }
  377. static void CopyString(LPOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  378. {
  379. utf8::DecodeUnitsIntoAndNullTerminate(psz1, psz2, psz2end);
  380. }
  381. static void CopyString(LPOLESTR psz1, char const * psz2, char const * psz2end)
  382. {
  383. while (psz2 < psz2end)
  384. {
  385. *(psz1++) = *psz2++;
  386. }
  387. *psz1 = 0;
  388. }
  389. // note: on failure this may throw or return FALSE, depending on
  390. // where the failure occurred.
  391. BOOL Init(uint cidHash);
  392. /*************************************************************************/
  393. /* The following members are related to the keyword descriptor tables */
  394. /*************************************************************************/
  395. struct KWD
  396. {
  397. OpCode nop2;
  398. byte prec2;
  399. OpCode nop1;
  400. byte prec1;
  401. };
  402. struct ReservedWordInfo
  403. {
  404. StaticSym const * sym;
  405. ushort grfid;
  406. };
  407. static const ReservedWordInfo s_reservedWordInfo[tkID];
  408. static const KWD g_mptkkwd[tkLimKwd];
  409. static const KWD * KwdOfTok(tokens tk)
  410. { return (unsigned int)tk < tkLimKwd ? g_mptkkwd + tk : nullptr; }
  411. static __declspec(noreturn) void OutOfMemory();
  412. #if PROFILE_DICTIONARY
  413. DictionaryStats *stats;
  414. #endif
  415. };