Hash.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. // StaticSym contains a string literal at the end (flexible array) and is
  7. // meant to be initialized statically. However, flexible array initialization
  8. // is not allowed in standard C++. We declare each StaticSym with length
  9. // instead and cast to common StaticSymLen<0>* (StaticSym*) to access.
  10. template <uint32 N>
  11. struct StaticSymLen
  12. {
  13. uint32 luHash;
  14. uint32 cch;
  15. OLECHAR sz[N];
  16. };
  17. typedef StaticSymLen<0> StaticSym;
  18. /***************************************************************************
  19. Hashing functions. Definitions in core\hashfunc.cpp.
  20. ***************************************************************************/
  21. ULONG CaseSensitiveComputeHash(LPCOLESTR prgch, LPCOLESTR end);
  22. ULONG CaseSensitiveComputeHash(LPCUTF8 prgch, LPCUTF8 end);
  23. ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
  24. enum
  25. {
  26. fidNil = 0x0000,
  27. fidKwdRsvd = 0x0001, // the keyword is a reserved word
  28. fidKwdFutRsvd = 0x0002, // a future reserved word, but only in strict mode
  29. // Flags to identify tracked aliases of "eval"
  30. fidEval = 0x0008,
  31. // Flags to identify tracked aliases of "let"
  32. fidLetOrConst = 0x0010, // ID has previously been used in a block-scoped declaration
  33. // This flag is used by the Parser CountDcls and FillDcls methods.
  34. // CountDcls sets the bit as it walks through the var decls so that
  35. // it can skip duplicates. FillDcls clears the bit as it walks through
  36. // again to skip duplicates.
  37. fidGlobalDcl = 0x2000,
  38. fidUsed = 0x4000, // name referenced by source code
  39. fidModuleExport = 0x8000 // name is module export
  40. };
  41. struct BlockIdsStack
  42. {
  43. int id;
  44. BlockIdsStack *prev;
  45. };
  46. class Span
  47. {
  48. charcount_t m_ichMin;
  49. charcount_t m_ichLim;
  50. public:
  51. Span(): m_ichMin((charcount_t)-1), m_ichLim((charcount_t)-1) { }
  52. Span(charcount_t ichMin, charcount_t ichLim): m_ichMin(ichMin), m_ichLim(ichLim) { }
  53. charcount_t GetIchMin() { return m_ichMin; }
  54. charcount_t GetIchLim() { Assert(m_ichMin != (charcount_t)-1); return m_ichLim; }
  55. void Set(charcount_t ichMin, charcount_t ichLim)
  56. {
  57. m_ichMin = ichMin;
  58. m_ichLim = ichLim;
  59. }
  60. operator bool() { return m_ichMin != -1; }
  61. };
  62. struct PidRefStack
  63. {
  64. PidRefStack() : isAsg(false), isDynamic(false), id(0), funcId(0), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false) {}
  65. PidRefStack(int id, Js::LocalFunctionId funcId) : isAsg(false), isDynamic(false), id(id), funcId(funcId), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false), isFuncAssignment(false) {}
  66. int GetScopeId() const { return id; }
  67. Js::LocalFunctionId GetFuncScopeId() const { return funcId; }
  68. Symbol *GetSym() const { return sym; }
  69. void SetSym(Symbol *sym) { this->sym = sym; }
  70. bool IsAssignment() const { return isAsg; }
  71. bool IsFuncAssignment() const { return isFuncAssignment; }
  72. bool IsEscape() const { return isEscape; }
  73. void SetIsEscape(bool is) { isEscape = is; }
  74. bool IsDynamicBinding() const { return isDynamic; }
  75. void SetDynamicBinding() { isDynamic = true; }
  76. Symbol **GetSymRef()
  77. {
  78. return &sym;
  79. }
  80. bool isAsg;
  81. bool isDynamic;
  82. bool isModuleExport;
  83. bool isEscape;
  84. bool isFuncAssignment;
  85. int id;
  86. Js::LocalFunctionId funcId;
  87. Symbol *sym;
  88. PidRefStack *prev;
  89. };
  90. enum AssignmentState : byte {
  91. NotAssigned,
  92. AssignedOnce,
  93. AssignedMultipleTimes
  94. };
  95. struct Ident
  96. {
  97. friend class HashTbl;
  98. private:
  99. Ident * m_pidNext; // next identifier in this hash bucket
  100. PidRefStack *m_pidRefStack;
  101. ushort m_tk; // token# if identifier is a keyword
  102. ushort m_grfid; // see fidXXX above
  103. uint32 m_luHash; // hash value
  104. uint32 m_cch; // length of the identifier spelling
  105. Js::PropertyId m_propertyId;
  106. AssignmentState assignmentState;
  107. OLECHAR m_sz[]; // the spelling follows (null terminated)
  108. void SetTk(tokens tk, ushort grfid);
  109. public:
  110. LPCOLESTR Psz(void)
  111. { return m_sz; }
  112. uint32 Cch(void)
  113. { return m_cch; }
  114. tokens Tk(bool isStrictMode);
  115. uint32 Hash(void)
  116. { return m_luHash; }
  117. PidRefStack *GetTopRef() const
  118. {
  119. return m_pidRefStack;
  120. }
  121. void SetTopRef(PidRefStack *ref)
  122. {
  123. m_pidRefStack = ref;
  124. }
  125. void PromoteAssignmentState()
  126. {
  127. if (assignmentState == NotAssigned)
  128. {
  129. assignmentState = AssignedOnce;
  130. }
  131. else if (assignmentState == AssignedOnce)
  132. {
  133. assignmentState = AssignedMultipleTimes;
  134. }
  135. }
  136. bool IsSingleAssignment()
  137. {
  138. return assignmentState == AssignedOnce;
  139. }
  140. PidRefStack *GetPidRefForScopeId(int scopeId)
  141. {
  142. PidRefStack *ref;
  143. for (ref = m_pidRefStack; ref; ref = ref->prev)
  144. {
  145. int refId = ref->GetScopeId();
  146. if (refId == scopeId)
  147. {
  148. return ref;
  149. }
  150. if (refId < scopeId)
  151. {
  152. break;
  153. }
  154. }
  155. return nullptr;
  156. }
  157. void PushPidRef(int blockId, Js::LocalFunctionId funcId, PidRefStack *newRef)
  158. {
  159. AssertMsg(blockId >= 0, "Block Id's should be greater than 0");
  160. newRef->id = blockId;
  161. newRef->funcId = funcId;
  162. newRef->prev = m_pidRefStack;
  163. m_pidRefStack = newRef;
  164. }
  165. PidRefStack * RemovePrevPidRef(PidRefStack *ref)
  166. {
  167. PidRefStack *prevRef;
  168. if (ref == nullptr)
  169. {
  170. prevRef = m_pidRefStack;
  171. Assert(prevRef);
  172. m_pidRefStack = prevRef->prev;
  173. }
  174. else
  175. {
  176. prevRef = ref->prev;
  177. Assert(prevRef);
  178. ref->prev = prevRef->prev;
  179. }
  180. return prevRef;
  181. }
  182. PidRefStack * TopDecl(int maxBlockId) const
  183. {
  184. for (PidRefStack *pidRef = m_pidRefStack; pidRef; pidRef = pidRef->prev)
  185. {
  186. if (pidRef->id > maxBlockId)
  187. {
  188. continue;
  189. }
  190. if (pidRef->sym != nullptr)
  191. {
  192. return pidRef;
  193. }
  194. }
  195. return nullptr;
  196. }
  197. PidRefStack * FindOrAddPidRef(ArenaAllocator *alloc, int scopeId, Js::LocalFunctionId funcId)
  198. {
  199. // If the stack is empty, or we are pushing to the innermost scope already,
  200. // we can go ahead and push a new PidRef on the stack.
  201. if (m_pidRefStack == nullptr)
  202. {
  203. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  204. if (newRef == nullptr)
  205. {
  206. return nullptr;
  207. }
  208. newRef->prev = m_pidRefStack;
  209. m_pidRefStack = newRef;
  210. return newRef;
  211. }
  212. // Search for the corresponding PidRef, or the position to insert the new PidRef.
  213. PidRefStack *ref = m_pidRefStack;
  214. PidRefStack *prevRef = nullptr;
  215. while (1)
  216. {
  217. // We may already have a ref for this scopeId.
  218. if (ref->id == scopeId)
  219. {
  220. return ref;
  221. }
  222. if (ref->prev == nullptr || ref->id < scopeId)
  223. {
  224. // No existing PidRef for this scopeId, so create and insert one at this position.
  225. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  226. if (newRef == nullptr)
  227. {
  228. return nullptr;
  229. }
  230. if (ref->id < scopeId)
  231. {
  232. if (prevRef != nullptr)
  233. {
  234. // Param scope has a reference to the same pid as the one we are inserting into the body.
  235. // There is a another reference (prevRef), probably from an inner block in the body.
  236. // So we should insert the new reference between them.
  237. newRef->prev = prevRef->prev;
  238. prevRef->prev = newRef;
  239. }
  240. else
  241. {
  242. // When we have code like below, prevRef will be null,
  243. // function (a = x) { var x = 1; }
  244. newRef->prev = m_pidRefStack;
  245. m_pidRefStack = newRef;
  246. }
  247. }
  248. else
  249. {
  250. newRef->prev = ref->prev;
  251. ref->prev = newRef;
  252. }
  253. return newRef;
  254. }
  255. Assert(ref->prev->id <= ref->id);
  256. prevRef = ref;
  257. ref = ref->prev;
  258. }
  259. }
  260. Js::PropertyId GetPropertyId() const { return m_propertyId; }
  261. void SetPropertyId(Js::PropertyId id) { m_propertyId = id; }
  262. void SetIsEval() { m_grfid |= fidEval; }
  263. BOOL GetIsEval() const { return m_grfid & fidEval; }
  264. void SetIsLetOrConst() { m_grfid |= fidLetOrConst; }
  265. BOOL GetIsLetOrConst() const { return m_grfid & fidLetOrConst; }
  266. void SetIsModuleExport() { m_grfid |= fidModuleExport; }
  267. BOOL GetIsModuleExport() const { return m_grfid & fidModuleExport; }
  268. };
  269. /*****************************************************************************/
  270. class HashTbl
  271. {
  272. public:
  273. static HashTbl * Create(uint cidHash, ErrHandler * perr);
  274. void Release(void)
  275. {
  276. delete this; // invokes overrided operator delete
  277. }
  278. BOOL TokIsBinop(tokens tk, int *popl, OpCode *pnop)
  279. {
  280. const KWD *pkwd = KwdOfTok(tk);
  281. if (nullptr == pkwd)
  282. return FALSE;
  283. *popl = pkwd->prec2;
  284. *pnop = pkwd->nop2;
  285. return TRUE;
  286. }
  287. BOOL TokIsUnop(tokens tk, int *popl, OpCode *pnop)
  288. {
  289. const KWD *pkwd = KwdOfTok(tk);
  290. if (nullptr == pkwd)
  291. return FALSE;
  292. *popl = pkwd->prec1;
  293. *pnop = pkwd->nop1;
  294. return TRUE;
  295. }
  296. IdentPtr PidFromTk(tokens tk);
  297. IdentPtr PidHashName(LPCOLESTR psz)
  298. {
  299. size_t csz = wcslen(psz);
  300. Assert(csz <= ULONG_MAX);
  301. return PidHashNameLen(psz, static_cast<uint32>(csz));
  302. }
  303. template <typename CharType>
  304. IdentPtr PidHashNameLen(CharType const * psz, CharType const * end, uint32 cch);
  305. template <typename CharType>
  306. IdentPtr PidHashNameLen(CharType const * psz, uint32 cch);
  307. template <typename CharType>
  308. IdentPtr PidHashNameLenWithHash(_In_reads_(cch) CharType const * psz, CharType const * end, int32 cch, uint32 luHash);
  309. template <typename CharType>
  310. inline IdentPtr FindExistingPid(
  311. CharType const * prgch,
  312. CharType const * end,
  313. int32 cch,
  314. uint32 luHash,
  315. IdentPtr **pppInsert,
  316. int32 *pBucketCount
  317. #if PROFILE_DICTIONARY
  318. , int& depth
  319. #endif
  320. );
  321. tokens TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode);
  322. tokens TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, uint32 cch);
  323. NoReleaseAllocator* GetAllocator() {return &m_noReleaseAllocator;}
  324. bool Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch);
  325. private:
  326. NoReleaseAllocator m_noReleaseAllocator; // to allocate identifiers
  327. Ident ** m_prgpidName; // hash table for names
  328. uint32 m_luMask; // hash mask
  329. uint32 m_luCount; // count of the number of entires in the hash table
  330. ErrHandler * m_perr; // error handler to use
  331. IdentPtr m_rpid[tkLimKwd];
  332. HashTbl(ErrHandler * perr)
  333. {
  334. m_prgpidName = nullptr;
  335. m_perr = perr;
  336. memset(&m_rpid, 0, sizeof(m_rpid));
  337. }
  338. ~HashTbl(void) {}
  339. void operator delete(void* p, size_t size)
  340. {
  341. HeapFree(p, size);
  342. }
  343. // Called to grow the number of buckets in the table to reduce the table density.
  344. void Grow();
  345. // Automatically grow the table if a bucket's length grows beyond BucketLengthLimit and the table is densely populated.
  346. static const uint BucketLengthLimit = 5;
  347. // When growing the bucket size we'll grow by GrowFactor. GrowFactor MUST be a power of 2.
  348. static const uint GrowFactor = 4;
  349. #if DEBUG
  350. uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
  351. #endif
  352. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  353. {
  354. return memcmp(psz1, psz2, (psz2end - psz2) * sizeof(OLECHAR)) == 0;
  355. }
  356. static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  357. {
  358. return utf8::CharsAreEqual(psz1, psz2, psz2end, utf8::doAllowThreeByteSurrogates);
  359. }
  360. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) char const * psz2, char const * psz2end)
  361. {
  362. while (psz2 < psz2end)
  363. {
  364. if (*psz1++ != *psz2++)
  365. {
  366. return false;
  367. }
  368. }
  369. return true;
  370. }
  371. static void CopyString(__in_ecount((psz2end - psz2) + 1) LPOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  372. {
  373. size_t cch = psz2end - psz2;
  374. js_memcpy_s(psz1, cch * sizeof(OLECHAR), psz2, cch * sizeof(OLECHAR));
  375. psz1[cch] = 0;
  376. }
  377. static void CopyString(LPOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  378. {
  379. utf8::DecodeUnitsIntoAndNullTerminate(psz1, psz2, psz2end);
  380. }
  381. static void CopyString(LPOLESTR psz1, char const * psz2, char const * psz2end)
  382. {
  383. while (psz2 < psz2end)
  384. {
  385. *(psz1++) = *psz2++;
  386. }
  387. *psz1 = 0;
  388. }
  389. // note: on failure this may throw or return FALSE, depending on
  390. // where the failure occurred.
  391. BOOL Init(uint cidHash);
  392. /*************************************************************************/
  393. /* The following members are related to the keyword descriptor tables */
  394. /*************************************************************************/
  395. struct KWD
  396. {
  397. OpCode nop2;
  398. byte prec2;
  399. OpCode nop1;
  400. byte prec1;
  401. };
  402. struct ReservedWordInfo
  403. {
  404. StaticSym const * sym;
  405. ushort grfid;
  406. };
  407. static const ReservedWordInfo s_reservedWordInfo[tkID];
  408. static const KWD g_mptkkwd[tkLimKwd];
  409. static const KWD * KwdOfTok(tokens tk)
  410. { return (unsigned int)tk < tkLimKwd ? g_mptkkwd + tk : nullptr; }
  411. #if PROFILE_DICTIONARY
  412. DictionaryStats *stats;
  413. #endif
  414. };