hash.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. struct StaticSym;
  7. /***************************************************************************
  8. Hashing functions. Definitions in core\hashfunc.cpp.
  9. ***************************************************************************/
  10. ULONG CaseSensitiveComputeHashCch(LPCOLESTR prgch, long cch);
  11. ULONG CaseSensitiveComputeHashCch(LPCUTF8 prgch, long cch);
  12. ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
  13. enum
  14. {
  15. fidNil = 0x0000,
  16. fidKwdRsvd = 0x0001, // the keyword is a reserved word
  17. fidKwdFutRsvd = 0x0002, // a future reserved word, but only in strict mode
  18. // Flags to identify tracked aliases of "eval"
  19. fidEval = 0x0008,
  20. // Flags to identify tracked aliases of "let"
  21. fidLetOrConst = 0x0010, // ID has previously been used in a block-scoped declaration
  22. // This flag is used by the Parser CountDcls and FillDcls methods.
  23. // CountDcls sets the bit as it walks through the var decls so that
  24. // it can skip duplicates. FillDcls clears the bit as it walks through
  25. // again to skip duplicates.
  26. fidGlobalDcl = 0x2000,
  27. fidUsed = 0x4000 // name referenced by source code
  28. };
  29. struct BlockIdsStack
  30. {
  31. int id;
  32. BlockIdsStack *prev;
  33. };
  34. class Span
  35. {
  36. charcount_t m_ichMin;
  37. charcount_t m_ichLim;
  38. public:
  39. Span(): m_ichMin((charcount_t)-1), m_ichLim((charcount_t)-1) { }
  40. Span(charcount_t ichMin, charcount_t ichLim): m_ichMin(ichMin), m_ichLim(ichLim) { }
  41. charcount_t GetIchMin() { return m_ichMin; }
  42. charcount_t GetIchLim() { Assert(m_ichMin != (charcount_t)-1); return m_ichLim; }
  43. void Set(charcount_t ichMin, charcount_t ichLim)
  44. {
  45. m_ichMin = ichMin;
  46. m_ichLim = ichLim;
  47. }
  48. operator bool() { return m_ichMin != -1; }
  49. };
  50. struct PidRefStack
  51. {
  52. PidRefStack() : isAsg(false), isDynamic(false), id(0), span(), sym(nullptr), prev(nullptr) {}
  53. PidRefStack(int id) : isAsg(false), isDynamic(false), id(id), span(), sym(nullptr), prev(nullptr) {}
  54. charcount_t GetIchMin() { return span.GetIchMin(); }
  55. charcount_t GetIchLim() { return span.GetIchLim(); }
  56. int GetScopeId() const { return id; }
  57. Symbol *GetSym() const { return sym; }
  58. void SetSym(Symbol *sym) { this->sym = sym; }
  59. bool IsAssignment() const { return isAsg; }
  60. bool IsDynamicBinding() const { return isDynamic; }
  61. void SetDynamicBinding() { isDynamic = true; }
  62. void TrackAssignment(charcount_t ichMin, charcount_t ichLim);
  63. Symbol **GetSymRef()
  64. {
  65. return &sym;
  66. }
  67. bool isAsg;
  68. bool isDynamic;
  69. int id;
  70. Span span;
  71. Symbol *sym;
  72. PidRefStack *prev;
  73. };
  74. enum AssignmentState : byte {
  75. NotAssigned,
  76. AssignedOnce,
  77. AssignedMultipleTimes
  78. };
  79. struct Ident
  80. {
  81. friend class HashTbl;
  82. private:
  83. Ident * m_pidNext; // next identifier in this hash bucket
  84. PidRefStack *m_pidRefStack;
  85. ushort m_tk; // token# if identifier is a keyword
  86. ushort m_grfid; // see fidXXX above
  87. ulong m_luHash; // hash value
  88. ulong m_cch; // length of the identifier spelling
  89. Js::PropertyId m_propertyId;
  90. AssignmentState assignmentState;
  91. OLECHAR m_sz[]; // the spelling follows (null terminated)
  92. void SetTk(tokens tk, ushort grfid);
  93. public:
  94. LPCOLESTR Psz(void)
  95. { return m_sz; }
  96. ulong Cch(void)
  97. { return m_cch; }
  98. tokens Tk(bool isStrictMode);
  99. ulong Hash(void)
  100. { return m_luHash; }
  101. PidRefStack *GetTopRef() const
  102. {
  103. return m_pidRefStack;
  104. }
  105. void SetTopRef(PidRefStack *ref)
  106. {
  107. m_pidRefStack = ref;
  108. }
  109. void PromoteAssignmentState()
  110. {
  111. if (assignmentState == NotAssigned)
  112. {
  113. assignmentState = AssignedOnce;
  114. }
  115. else if (assignmentState == AssignedOnce)
  116. {
  117. assignmentState = AssignedMultipleTimes;
  118. }
  119. }
  120. bool IsSingleAssignment()
  121. {
  122. return assignmentState == AssignedOnce;
  123. }
  124. PidRefStack *GetPidRefForScopeId(int scopeId)
  125. {
  126. PidRefStack *ref;
  127. for (ref = m_pidRefStack; ref; ref = ref->prev)
  128. {
  129. int refId = ref->GetScopeId();
  130. if (refId == scopeId)
  131. {
  132. return ref;
  133. }
  134. if (refId < scopeId)
  135. {
  136. break;
  137. }
  138. }
  139. return nullptr;
  140. }
  141. charcount_t GetTopIchMin() const
  142. {
  143. Assert(m_pidRefStack);
  144. return m_pidRefStack->GetIchMin();
  145. }
  146. charcount_t GetTopIchLim() const
  147. {
  148. Assert(m_pidRefStack);
  149. return m_pidRefStack->GetIchLim();
  150. }
  151. void PushPidRef(int blockId, PidRefStack *newRef)
  152. {
  153. AssertMsg(blockId >= 0, "Block Id's should be greater than 0");
  154. newRef->id = blockId;
  155. newRef->prev = m_pidRefStack;
  156. m_pidRefStack = newRef;
  157. }
  158. PidRefStack * RemovePrevPidRef(PidRefStack *ref)
  159. {
  160. PidRefStack *prevRef;
  161. if (ref == nullptr)
  162. {
  163. prevRef = m_pidRefStack;
  164. Assert(prevRef);
  165. m_pidRefStack = prevRef->prev;
  166. }
  167. else
  168. {
  169. prevRef = ref->prev;
  170. Assert(prevRef);
  171. ref->prev = prevRef->prev;
  172. }
  173. return prevRef;
  174. }
  175. PidRefStack * FindOrAddPidRef(ArenaAllocator *alloc, int scopeId, int maxScopeId = -1)
  176. {
  177. // If we were supplied with a maxScopeId, then we potentially need to look one more
  178. // scope level out. This can happen if we have a declaration in function scope shadowing
  179. // a parameter scope declaration. In this case we'd need to look beyond the body scope (scopeId)
  180. // to the outer parameterScope (maxScopeId).
  181. if (maxScopeId == -1)
  182. {
  183. maxScopeId = scopeId;
  184. }
  185. // If the stack is empty, or we are pushing to the innermost scope already,
  186. // we can go ahead and push a new PidRef on the stack.
  187. if (m_pidRefStack == nullptr || m_pidRefStack->id < maxScopeId)
  188. {
  189. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId);
  190. if (newRef == nullptr)
  191. {
  192. return nullptr;
  193. }
  194. newRef->prev = m_pidRefStack;
  195. m_pidRefStack = newRef;
  196. return newRef;
  197. }
  198. // Search for the corresponding PidRef, or the position to insert the new PidRef.
  199. PidRefStack *ref = m_pidRefStack;
  200. while (1)
  201. {
  202. // We may already have a ref for this scopeId.
  203. if (ref->id == scopeId)
  204. {
  205. return ref;
  206. }
  207. if (ref->id == maxScopeId
  208. // If we match the different maxScopeId, then this match is sufficient if it is a decl.
  209. // This is because the parameter scope decl would have been created before this point.
  210. && ref->sym != nullptr)
  211. {
  212. return ref;
  213. }
  214. if (ref->prev == nullptr || ref->prev->id < maxScopeId)
  215. {
  216. // No existing PidRef for this scopeId, so create and insert one at this position.
  217. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId);
  218. if (newRef == nullptr)
  219. {
  220. return nullptr;
  221. }
  222. if (ref->id < scopeId)
  223. {
  224. // Without parameter scope, we would have just pushed the ref instead of inserting.
  225. // We effectively had a false positive match (a parameter scope ref with no sym)
  226. // so we need to push the Pid rather than inserting.
  227. newRef->prev = m_pidRefStack;
  228. m_pidRefStack = newRef;
  229. }
  230. else
  231. {
  232. newRef->prev = ref->prev;
  233. ref->prev = newRef;
  234. }
  235. return newRef;
  236. }
  237. Assert(ref->prev->id <= ref->id);
  238. ref = ref->prev;
  239. }
  240. }
  241. Js::PropertyId GetPropertyId() const { return m_propertyId; }
  242. void SetPropertyId(Js::PropertyId id) { m_propertyId = id; }
  243. void SetIsEval() { m_grfid |= fidEval; }
  244. BOOL GetIsEval() const { return m_grfid & fidEval; }
  245. void SetIsLetOrConst() { m_grfid |= fidLetOrConst; }
  246. BOOL GetIsLetOrConst() const { return m_grfid & fidLetOrConst; }
  247. };
  248. /*****************************************************************************/
  249. class HashTbl
  250. {
  251. public:
  252. static HashTbl * Create(uint cidHash, ErrHandler * perr);
  253. void Release(void)
  254. {
  255. delete this;
  256. }
  257. BOOL TokIsBinop(tokens tk, int *popl, OpCode *pnop)
  258. {
  259. const KWD *pkwd = KwdOfTok(tk);
  260. if (nullptr == pkwd)
  261. return FALSE;
  262. *popl = pkwd->prec2;
  263. *pnop = pkwd->nop2;
  264. return TRUE;
  265. }
  266. BOOL TokIsUnop(tokens tk, int *popl, OpCode *pnop)
  267. {
  268. const KWD *pkwd = KwdOfTok(tk);
  269. if (nullptr == pkwd)
  270. return FALSE;
  271. *popl = pkwd->prec1;
  272. *pnop = pkwd->nop1;
  273. return TRUE;
  274. }
  275. IdentPtr PidFromTk(tokens tk);
  276. IdentPtr PidHashName(LPCOLESTR psz)
  277. {
  278. size_t csz = wcslen(psz);
  279. Assert(csz <= ULONG_MAX);
  280. return PidHashNameLen(psz, static_cast<ulong>(csz));
  281. }
  282. template <typename CharType>
  283. IdentPtr PidHashNameLen(CharType const * psz, ulong cch);
  284. template <typename CharType>
  285. IdentPtr PidHashNameLenWithHash(_In_reads_(cch) CharType const * psz, long cch, ulong luHash);
  286. template <typename CharType>
  287. __inline IdentPtr FindExistingPid(
  288. CharType const * prgch,
  289. long cch,
  290. ulong luHash,
  291. IdentPtr **pppInsert,
  292. long *pBucketCount
  293. #if PROFILE_DICTIONARY
  294. , int& depth
  295. #endif
  296. );
  297. tokens TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, ulong cch, bool isStrictMode);
  298. tokens TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, ulong cch);
  299. NoReleaseAllocator* GetAllocator() {return &m_noReleaseAllocator;}
  300. bool Contains(_In_reads_(cch) LPCOLESTR prgch, long cch);
  301. private:
  302. NoReleaseAllocator m_noReleaseAllocator; // to allocate identifiers
  303. Ident ** m_prgpidName; // hash table for names
  304. ulong m_luMask; // hash mask
  305. ulong m_luCount; // count of the number of entires in the hash table
  306. ErrHandler * m_perr; // error handler to use
  307. IdentPtr m_rpid[tkLimKwd];
  308. HashTbl(ErrHandler * perr)
  309. {
  310. m_prgpidName = nullptr;
  311. m_perr = perr;
  312. memset(&m_rpid, 0, sizeof(m_rpid));
  313. }
  314. ~HashTbl(void) {}
  315. // Called to grow the number of buckets in the table to reduce the table density.
  316. void Grow();
  317. // Automatically grow the table if a bucket's length grows beyond BucketLengthLimit and the table is densely populated.
  318. static const uint BucketLengthLimit = 5;
  319. // When growing the bucket size we'll grow by GrowFactor. GrowFactor MUST be a power of 2.
  320. static const uint GrowFactor = 4;
  321. #if DEBUG
  322. uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
  323. #endif
  324. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(cch2) LPCOLESTR psz2, long cch2)
  325. {
  326. return memcmp(psz1, psz2, cch2 * sizeof(OLECHAR)) == 0;
  327. }
  328. static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, long cch2)
  329. {
  330. return utf8::CharsAreEqual(psz1, psz2, cch2, utf8::doAllowThreeByteSurrogates);
  331. }
  332. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(cch2) char const * psz2, long cch2)
  333. {
  334. while (cch2-- > 0)
  335. {
  336. if (*psz1++ != *psz2++)
  337. return false;
  338. }
  339. return true;
  340. }
  341. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, __in_ecount(cch) LPCOLESTR psz2, long cch)
  342. {
  343. js_memcpy_s(psz1, cch * sizeof(OLECHAR), psz2, cch * sizeof(OLECHAR));
  344. psz1[cch] = 0;
  345. }
  346. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, LPCUTF8 psz2, long cch)
  347. {
  348. utf8::DecodeIntoAndNullTerminate(psz1, psz2, cch);
  349. }
  350. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, __in_ecount(cch) char const * psz2, long cch)
  351. {
  352. while (cch-- > 0)
  353. *(psz1++) = *psz2++;
  354. *psz1 = 0;
  355. }
  356. // note: on failure this may throw or return FALSE, depending on
  357. // where the failure occurred.
  358. BOOL Init(uint cidHash);
  359. /*************************************************************************/
  360. /* The following members are related to the keyword descriptor tables */
  361. /*************************************************************************/
  362. struct KWD
  363. {
  364. OpCode nop2;
  365. byte prec2;
  366. OpCode nop1;
  367. byte prec1;
  368. };
  369. struct ReservedWordInfo
  370. {
  371. StaticSym const * sym;
  372. ushort grfid;
  373. };
  374. static const ReservedWordInfo s_reservedWordInfo[tkID];
  375. static const KWD g_mptkkwd[tkLimKwd];
  376. static const KWD * KwdOfTok(tokens tk)
  377. { return (unsigned int)tk < tkLimKwd ? g_mptkkwd + tk : nullptr; }
  378. #if PROFILE_DICTIONARY
  379. DictionaryStats *stats;
  380. #endif
  381. };