Hash.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. struct StaticSym;
  7. /***************************************************************************
  8. Hashing functions. Definitions in core\hashfunc.cpp.
  9. ***************************************************************************/
  10. ULONG CaseSensitiveComputeHashCch(LPCOLESTR prgch, long cch);
  11. ULONG CaseSensitiveComputeHashCch(LPCUTF8 prgch, long cch);
  12. ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
  13. enum
  14. {
  15. fidNil = 0x0000,
  16. fidKwdRsvd = 0x0001, // the keyword is a reserved word
  17. fidKwdFutRsvd = 0x0002, // a future reserved word, but only in strict mode
  18. // Flags to identify tracked aliases of "eval"
  19. fidEval = 0x0008,
  20. // Flags to identify tracked aliases of "let"
  21. fidLetOrConst = 0x0010, // ID has previously been used in a block-scoped declaration
  22. // This flag is used by the Parser CountDcls and FillDcls methods.
  23. // CountDcls sets the bit as it walks through the var decls so that
  24. // it can skip duplicates. FillDcls clears the bit as it walks through
  25. // again to skip duplicates.
  26. fidGlobalDcl = 0x2000,
  27. fidUsed = 0x4000 // name referenced by source code
  28. };
  29. struct BlockIdsStack
  30. {
  31. int id;
  32. BlockIdsStack *prev;
  33. };
  34. class Span
  35. {
  36. charcount_t m_ichMin;
  37. charcount_t m_ichLim;
  38. public:
  39. Span(): m_ichMin((charcount_t)-1), m_ichLim((charcount_t)-1) { }
  40. Span(charcount_t ichMin, charcount_t ichLim): m_ichMin(ichMin), m_ichLim(ichLim) { }
  41. charcount_t GetIchMin() { return m_ichMin; }
  42. charcount_t GetIchLim() { Assert(m_ichMin != (charcount_t)-1); return m_ichLim; }
  43. void Set(charcount_t ichMin, charcount_t ichLim)
  44. {
  45. m_ichMin = ichMin;
  46. m_ichLim = ichLim;
  47. }
  48. operator bool() { return m_ichMin != -1; }
  49. };
  50. struct PidRefStack
  51. {
  52. PidRefStack() : isDynamic(false), id(0), sym(nullptr), prev(nullptr), isModuleExport(false) {}
  53. PidRefStack(int id) : isDynamic(false), id(id), sym(nullptr), prev(nullptr), isModuleExport(false) {}
  54. int GetScopeId() const { return id; }
  55. Symbol *GetSym() const { return sym; }
  56. void SetSym(Symbol *sym) { this->sym = sym; }
  57. bool IsDynamicBinding() const { return isDynamic; }
  58. void SetDynamicBinding() { isDynamic = true; }
  59. bool IsModuleExport() const { return isModuleExport; }
  60. void SetModuleExport() { isModuleExport = true; }
  61. Symbol **GetSymRef()
  62. {
  63. return &sym;
  64. }
  65. bool isDynamic;
  66. bool isModuleExport;
  67. int id;
  68. Symbol *sym;
  69. PidRefStack *prev;
  70. };
  71. enum AssignmentState : byte {
  72. NotAssigned,
  73. AssignedOnce,
  74. AssignedMultipleTimes
  75. };
  76. struct Ident
  77. {
  78. friend class HashTbl;
  79. private:
  80. Ident * m_pidNext; // next identifier in this hash bucket
  81. PidRefStack *m_pidRefStack;
  82. ushort m_tk; // token# if identifier is a keyword
  83. ushort m_grfid; // see fidXXX above
  84. ulong m_luHash; // hash value
  85. ulong m_cch; // length of the identifier spelling
  86. Js::PropertyId m_propertyId;
  87. AssignmentState assignmentState;
  88. OLECHAR m_sz[]; // the spelling follows (null terminated)
  89. void SetTk(tokens tk, ushort grfid);
  90. public:
  91. LPCOLESTR Psz(void)
  92. { return m_sz; }
  93. ulong Cch(void)
  94. { return m_cch; }
  95. tokens Tk(bool isStrictMode);
  96. ulong Hash(void)
  97. { return m_luHash; }
  98. PidRefStack *GetTopRef() const
  99. {
  100. return m_pidRefStack;
  101. }
  102. void SetTopRef(PidRefStack *ref)
  103. {
  104. m_pidRefStack = ref;
  105. }
  106. void PromoteAssignmentState()
  107. {
  108. if (assignmentState == NotAssigned)
  109. {
  110. assignmentState = AssignedOnce;
  111. }
  112. else if (assignmentState == AssignedOnce)
  113. {
  114. assignmentState = AssignedMultipleTimes;
  115. }
  116. }
  117. bool IsSingleAssignment()
  118. {
  119. return assignmentState == AssignedOnce;
  120. }
  121. PidRefStack *GetPidRefForScopeId(int scopeId)
  122. {
  123. PidRefStack *ref;
  124. for (ref = m_pidRefStack; ref; ref = ref->prev)
  125. {
  126. int refId = ref->GetScopeId();
  127. if (refId == scopeId)
  128. {
  129. return ref;
  130. }
  131. if (refId < scopeId)
  132. {
  133. break;
  134. }
  135. }
  136. return nullptr;
  137. }
  138. void PushPidRef(int blockId, PidRefStack *newRef)
  139. {
  140. AssertMsg(blockId >= 0, "Block Id's should be greater than 0");
  141. newRef->id = blockId;
  142. newRef->prev = m_pidRefStack;
  143. m_pidRefStack = newRef;
  144. }
  145. PidRefStack * RemovePrevPidRef(PidRefStack *ref)
  146. {
  147. PidRefStack *prevRef;
  148. if (ref == nullptr)
  149. {
  150. prevRef = m_pidRefStack;
  151. Assert(prevRef);
  152. m_pidRefStack = prevRef->prev;
  153. }
  154. else
  155. {
  156. prevRef = ref->prev;
  157. Assert(prevRef);
  158. ref->prev = prevRef->prev;
  159. }
  160. return prevRef;
  161. }
  162. PidRefStack * FindOrAddPidRef(ArenaAllocator *alloc, int scopeId)
  163. {
  164. // If the stack is empty, or we are pushing to the innermost scope already,
  165. // we can go ahead and push a new PidRef on the stack.
  166. if (m_pidRefStack == nullptr)
  167. {
  168. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId);
  169. if (newRef == nullptr)
  170. {
  171. return nullptr;
  172. }
  173. newRef->prev = m_pidRefStack;
  174. m_pidRefStack = newRef;
  175. return newRef;
  176. }
  177. // Search for the corresponding PidRef, or the position to insert the new PidRef.
  178. PidRefStack *ref = m_pidRefStack;
  179. PidRefStack *prevRef = nullptr;
  180. while (1)
  181. {
  182. // We may already have a ref for this scopeId.
  183. if (ref->id == scopeId)
  184. {
  185. return ref;
  186. }
  187. if (ref->prev == nullptr || ref->id < scopeId)
  188. {
  189. // No existing PidRef for this scopeId, so create and insert one at this position.
  190. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId);
  191. if (newRef == nullptr)
  192. {
  193. return nullptr;
  194. }
  195. if (ref->id < scopeId)
  196. {
  197. if (prevRef != nullptr)
  198. {
  199. // Param scope has a reference to the same pid as the one we are inserting into the body.
  200. // There is a another reference (prevRef), probably from an inner block in the body.
  201. // So we should insert the new reference between them.
  202. newRef->prev = prevRef->prev;
  203. prevRef->prev = newRef;
  204. }
  205. else
  206. {
  207. // When we have code like below, prevRef will be null,
  208. // function (a = x) { var x = 1; }
  209. newRef->prev = m_pidRefStack;
  210. m_pidRefStack = newRef;
  211. }
  212. }
  213. else
  214. {
  215. newRef->prev = ref->prev;
  216. ref->prev = newRef;
  217. }
  218. return newRef;
  219. }
  220. Assert(ref->prev->id <= ref->id);
  221. prevRef = ref;
  222. ref = ref->prev;
  223. }
  224. }
  225. Js::PropertyId GetPropertyId() const { return m_propertyId; }
  226. void SetPropertyId(Js::PropertyId id) { m_propertyId = id; }
  227. void SetIsEval() { m_grfid |= fidEval; }
  228. BOOL GetIsEval() const { return m_grfid & fidEval; }
  229. void SetIsLetOrConst() { m_grfid |= fidLetOrConst; }
  230. BOOL GetIsLetOrConst() const { return m_grfid & fidLetOrConst; }
  231. };
  232. /*****************************************************************************/
  233. class HashTbl
  234. {
  235. public:
  236. static HashTbl * Create(uint cidHash, ErrHandler * perr);
  237. void Release(void)
  238. {
  239. delete this;
  240. }
  241. BOOL TokIsBinop(tokens tk, int *popl, OpCode *pnop)
  242. {
  243. const KWD *pkwd = KwdOfTok(tk);
  244. if (nullptr == pkwd)
  245. return FALSE;
  246. *popl = pkwd->prec2;
  247. *pnop = pkwd->nop2;
  248. return TRUE;
  249. }
  250. BOOL TokIsUnop(tokens tk, int *popl, OpCode *pnop)
  251. {
  252. const KWD *pkwd = KwdOfTok(tk);
  253. if (nullptr == pkwd)
  254. return FALSE;
  255. *popl = pkwd->prec1;
  256. *pnop = pkwd->nop1;
  257. return TRUE;
  258. }
  259. IdentPtr PidFromTk(tokens tk);
  260. IdentPtr PidHashName(LPCOLESTR psz)
  261. {
  262. size_t csz = wcslen(psz);
  263. Assert(csz <= ULONG_MAX);
  264. return PidHashNameLen(psz, static_cast<ulong>(csz));
  265. }
  266. template <typename CharType>
  267. IdentPtr PidHashNameLen(CharType const * psz, ulong cch);
  268. template <typename CharType>
  269. IdentPtr PidHashNameLenWithHash(_In_reads_(cch) CharType const * psz, long cch, ulong luHash);
  270. template <typename CharType>
  271. __inline IdentPtr FindExistingPid(
  272. CharType const * prgch,
  273. long cch,
  274. ulong luHash,
  275. IdentPtr **pppInsert,
  276. long *pBucketCount
  277. #if PROFILE_DICTIONARY
  278. , int& depth
  279. #endif
  280. );
  281. tokens TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, ulong cch, bool isStrictMode);
  282. tokens TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, ulong cch);
  283. NoReleaseAllocator* GetAllocator() {return &m_noReleaseAllocator;}
  284. bool Contains(_In_reads_(cch) LPCOLESTR prgch, long cch);
  285. private:
  286. NoReleaseAllocator m_noReleaseAllocator; // to allocate identifiers
  287. Ident ** m_prgpidName; // hash table for names
  288. ulong m_luMask; // hash mask
  289. ulong m_luCount; // count of the number of entires in the hash table
  290. ErrHandler * m_perr; // error handler to use
  291. IdentPtr m_rpid[tkLimKwd];
  292. HashTbl(ErrHandler * perr)
  293. {
  294. m_prgpidName = nullptr;
  295. m_perr = perr;
  296. memset(&m_rpid, 0, sizeof(m_rpid));
  297. }
  298. ~HashTbl(void) {}
  299. // Called to grow the number of buckets in the table to reduce the table density.
  300. void Grow();
  301. // Automatically grow the table if a bucket's length grows beyond BucketLengthLimit and the table is densely populated.
  302. static const uint BucketLengthLimit = 5;
  303. // When growing the bucket size we'll grow by GrowFactor. GrowFactor MUST be a power of 2.
  304. static const uint GrowFactor = 4;
  305. #if DEBUG
  306. uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
  307. #endif
  308. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(cch2) LPCOLESTR psz2, long cch2)
  309. {
  310. return memcmp(psz1, psz2, cch2 * sizeof(OLECHAR)) == 0;
  311. }
  312. static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, long cch2)
  313. {
  314. return utf8::CharsAreEqual(psz1, psz2, cch2, utf8::doAllowThreeByteSurrogates);
  315. }
  316. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(cch2) char const * psz2, long cch2)
  317. {
  318. while (cch2-- > 0)
  319. {
  320. if (*psz1++ != *psz2++)
  321. return false;
  322. }
  323. return true;
  324. }
  325. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, __in_ecount(cch) LPCOLESTR psz2, long cch)
  326. {
  327. js_memcpy_s(psz1, cch * sizeof(OLECHAR), psz2, cch * sizeof(OLECHAR));
  328. psz1[cch] = 0;
  329. }
  330. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, LPCUTF8 psz2, long cch)
  331. {
  332. utf8::DecodeIntoAndNullTerminate(psz1, psz2, cch);
  333. }
  334. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, __in_ecount(cch) char const * psz2, long cch)
  335. {
  336. while (cch-- > 0)
  337. *(psz1++) = *psz2++;
  338. *psz1 = 0;
  339. }
  340. // note: on failure this may throw or return FALSE, depending on
  341. // where the failure occurred.
  342. BOOL Init(uint cidHash);
  343. /*************************************************************************/
  344. /* The following members are related to the keyword descriptor tables */
  345. /*************************************************************************/
  346. struct KWD
  347. {
  348. OpCode nop2;
  349. byte prec2;
  350. OpCode nop1;
  351. byte prec1;
  352. };
  353. struct ReservedWordInfo
  354. {
  355. StaticSym const * sym;
  356. ushort grfid;
  357. };
  358. static const ReservedWordInfo s_reservedWordInfo[tkID];
  359. static const KWD g_mptkkwd[tkLimKwd];
  360. static const KWD * KwdOfTok(tokens tk)
  361. { return (unsigned int)tk < tkLimKwd ? g_mptkkwd + tk : nullptr; }
  362. #if PROFILE_DICTIONARY
  363. DictionaryStats *stats;
  364. #endif
  365. };