Hash.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. // StaticSym contains a string literal at the end (flexible array) and is
  7. // meant to be initialized statically. However, flexible array initialization
  8. // is not allowed in standard C++. We declare each StaticSym with length
  9. // instead and cast to common StaticSymLen<0>* (StaticSym*) to access.
  10. template <uint32 N>
  11. struct StaticSymLen
  12. {
  13. uint32 luHash;
  14. uint32 cch;
  15. OLECHAR sz[N];
  16. };
  17. typedef StaticSymLen<0> StaticSym;
  18. /***************************************************************************
  19. Hashing functions. Definitions in core\hashfunc.cpp.
  20. ***************************************************************************/
  21. ULONG CaseSensitiveComputeHash(LPCOLESTR prgch, LPCOLESTR end);
  22. ULONG CaseSensitiveComputeHash(LPCUTF8 prgch, LPCUTF8 end);
  23. ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
  24. enum
  25. {
  26. fidNil = 0x0000,
  27. fidKwdRsvd = 0x0001, // the keyword is a reserved word
  28. fidKwdFutRsvd = 0x0002, // a future reserved word, but only in strict mode
  29. // Flags to identify tracked aliases of "eval"
  30. fidEval = 0x0008,
  31. // Flags to identify tracked aliases of "let"
  32. fidLetOrConst = 0x0010, // ID has previously been used in a block-scoped declaration
  33. // This flag is used by the Parser CountDcls and FillDcls methods.
  34. // CountDcls sets the bit as it walks through the var decls so that
  35. // it can skip duplicates. FillDcls clears the bit as it walks through
  36. // again to skip duplicates.
  37. fidGlobalDcl = 0x2000,
  38. fidUsed = 0x4000, // name referenced by source code
  39. fidModuleExport = 0x8000 // name is module export
  40. };
  41. struct BlockIdsStack
  42. {
  43. int id;
  44. BlockIdsStack *prev;
  45. };
  46. class Span
  47. {
  48. charcount_t m_ichMin;
  49. charcount_t m_ichLim;
  50. public:
  51. Span(): m_ichMin((charcount_t)-1), m_ichLim((charcount_t)-1) { }
  52. Span(charcount_t ichMin, charcount_t ichLim): m_ichMin(ichMin), m_ichLim(ichLim) { }
  53. charcount_t GetIchMin() { return m_ichMin; }
  54. charcount_t GetIchLim() { Assert(m_ichMin != (charcount_t)-1); return m_ichLim; }
  55. void Set(charcount_t ichMin, charcount_t ichLim)
  56. {
  57. m_ichMin = ichMin;
  58. m_ichLim = ichLim;
  59. }
  60. operator bool() { return m_ichMin != -1; }
  61. };
  62. struct PidRefStack
  63. {
  64. PidRefStack() : isAsg(false), isDynamic(false), id(0), funcId(0), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false) {}
  65. PidRefStack(int id, Js::LocalFunctionId funcId) : isAsg(false), isDynamic(false), id(id), funcId(funcId), sym(nullptr), prev(nullptr), isEscape(false), isModuleExport(false) {}
  66. int GetScopeId() const { return id; }
  67. Js::LocalFunctionId GetFuncScopeId() const { return funcId; }
  68. Symbol *GetSym() const { return sym; }
  69. void SetSym(Symbol *sym) { this->sym = sym; }
  70. bool IsAssignment() const { return isAsg; }
  71. bool IsEscape() const { return isEscape; }
  72. void SetIsEscape(bool is) { isEscape = is; }
  73. bool IsDynamicBinding() const { return isDynamic; }
  74. void SetDynamicBinding() { isDynamic = true; }
  75. Symbol **GetSymRef()
  76. {
  77. return &sym;
  78. }
  79. bool isAsg;
  80. bool isDynamic;
  81. bool isModuleExport;
  82. bool isEscape;
  83. int id;
  84. Js::LocalFunctionId funcId;
  85. Symbol *sym;
  86. PidRefStack *prev;
  87. };
  88. enum AssignmentState : byte {
  89. NotAssigned,
  90. AssignedOnce,
  91. AssignedMultipleTimes
  92. };
  93. struct Ident
  94. {
  95. friend class HashTbl;
  96. private:
  97. Ident * m_pidNext; // next identifier in this hash bucket
  98. PidRefStack *m_pidRefStack;
  99. ushort m_tk; // token# if identifier is a keyword
  100. ushort m_grfid; // see fidXXX above
  101. uint32 m_luHash; // hash value
  102. uint32 m_cch; // length of the identifier spelling
  103. Js::PropertyId m_propertyId;
  104. AssignmentState assignmentState;
  105. OLECHAR m_sz[]; // the spelling follows (null terminated)
  106. void SetTk(tokens tk, ushort grfid);
  107. public:
  108. LPCOLESTR Psz(void)
  109. { return m_sz; }
  110. uint32 Cch(void)
  111. { return m_cch; }
  112. tokens Tk(bool isStrictMode);
  113. uint32 Hash(void)
  114. { return m_luHash; }
  115. PidRefStack *GetTopRef() const
  116. {
  117. return m_pidRefStack;
  118. }
  119. void SetTopRef(PidRefStack *ref)
  120. {
  121. m_pidRefStack = ref;
  122. }
  123. void PromoteAssignmentState()
  124. {
  125. if (assignmentState == NotAssigned)
  126. {
  127. assignmentState = AssignedOnce;
  128. }
  129. else if (assignmentState == AssignedOnce)
  130. {
  131. assignmentState = AssignedMultipleTimes;
  132. }
  133. }
  134. bool IsSingleAssignment()
  135. {
  136. return assignmentState == AssignedOnce;
  137. }
  138. PidRefStack *GetPidRefForScopeId(int scopeId)
  139. {
  140. PidRefStack *ref;
  141. for (ref = m_pidRefStack; ref; ref = ref->prev)
  142. {
  143. int refId = ref->GetScopeId();
  144. if (refId == scopeId)
  145. {
  146. return ref;
  147. }
  148. if (refId < scopeId)
  149. {
  150. break;
  151. }
  152. }
  153. return nullptr;
  154. }
  155. void PushPidRef(int blockId, Js::LocalFunctionId funcId, PidRefStack *newRef)
  156. {
  157. AssertMsg(blockId >= 0, "Block Id's should be greater than 0");
  158. newRef->id = blockId;
  159. newRef->funcId = funcId;
  160. newRef->prev = m_pidRefStack;
  161. m_pidRefStack = newRef;
  162. }
  163. PidRefStack * RemovePrevPidRef(PidRefStack *ref)
  164. {
  165. PidRefStack *prevRef;
  166. if (ref == nullptr)
  167. {
  168. prevRef = m_pidRefStack;
  169. Assert(prevRef);
  170. m_pidRefStack = prevRef->prev;
  171. }
  172. else
  173. {
  174. prevRef = ref->prev;
  175. Assert(prevRef);
  176. ref->prev = prevRef->prev;
  177. }
  178. return prevRef;
  179. }
  180. PidRefStack * TopDecl(int maxBlockId) const
  181. {
  182. for (PidRefStack *pidRef = m_pidRefStack; pidRef; pidRef = pidRef->prev)
  183. {
  184. if (pidRef->id > maxBlockId)
  185. {
  186. continue;
  187. }
  188. if (pidRef->sym != nullptr)
  189. {
  190. return pidRef;
  191. }
  192. }
  193. return nullptr;
  194. }
  195. PidRefStack * FindOrAddPidRef(ArenaAllocator *alloc, int scopeId, Js::LocalFunctionId funcId)
  196. {
  197. // If the stack is empty, or we are pushing to the innermost scope already,
  198. // we can go ahead and push a new PidRef on the stack.
  199. if (m_pidRefStack == nullptr)
  200. {
  201. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  202. if (newRef == nullptr)
  203. {
  204. return nullptr;
  205. }
  206. newRef->prev = m_pidRefStack;
  207. m_pidRefStack = newRef;
  208. return newRef;
  209. }
  210. // Search for the corresponding PidRef, or the position to insert the new PidRef.
  211. PidRefStack *ref = m_pidRefStack;
  212. PidRefStack *prevRef = nullptr;
  213. while (1)
  214. {
  215. // We may already have a ref for this scopeId.
  216. if (ref->id == scopeId)
  217. {
  218. return ref;
  219. }
  220. if (ref->prev == nullptr || ref->id < scopeId)
  221. {
  222. // No existing PidRef for this scopeId, so create and insert one at this position.
  223. PidRefStack *newRef = Anew(alloc, PidRefStack, scopeId, funcId);
  224. if (newRef == nullptr)
  225. {
  226. return nullptr;
  227. }
  228. if (ref->id < scopeId)
  229. {
  230. if (prevRef != nullptr)
  231. {
  232. // Param scope has a reference to the same pid as the one we are inserting into the body.
  233. // There is a another reference (prevRef), probably from an inner block in the body.
  234. // So we should insert the new reference between them.
  235. newRef->prev = prevRef->prev;
  236. prevRef->prev = newRef;
  237. }
  238. else
  239. {
  240. // When we have code like below, prevRef will be null,
  241. // function (a = x) { var x = 1; }
  242. newRef->prev = m_pidRefStack;
  243. m_pidRefStack = newRef;
  244. }
  245. }
  246. else
  247. {
  248. newRef->prev = ref->prev;
  249. ref->prev = newRef;
  250. }
  251. return newRef;
  252. }
  253. Assert(ref->prev->id <= ref->id);
  254. prevRef = ref;
  255. ref = ref->prev;
  256. }
  257. }
  258. Js::PropertyId GetPropertyId() const { return m_propertyId; }
  259. void SetPropertyId(Js::PropertyId id) { m_propertyId = id; }
  260. void SetIsEval() { m_grfid |= fidEval; }
  261. BOOL GetIsEval() const { return m_grfid & fidEval; }
  262. void SetIsLetOrConst() { m_grfid |= fidLetOrConst; }
  263. BOOL GetIsLetOrConst() const { return m_grfid & fidLetOrConst; }
  264. void SetIsModuleExport() { m_grfid |= fidModuleExport; }
  265. BOOL GetIsModuleExport() const { return m_grfid & fidModuleExport; }
  266. };
  267. /*****************************************************************************/
  268. class HashTbl
  269. {
  270. public:
  271. static HashTbl * Create(uint cidHash, ErrHandler * perr);
  272. void Release(void)
  273. {
  274. delete this;
  275. }
  276. BOOL TokIsBinop(tokens tk, int *popl, OpCode *pnop)
  277. {
  278. const KWD *pkwd = KwdOfTok(tk);
  279. if (nullptr == pkwd)
  280. return FALSE;
  281. *popl = pkwd->prec2;
  282. *pnop = pkwd->nop2;
  283. return TRUE;
  284. }
  285. BOOL TokIsUnop(tokens tk, int *popl, OpCode *pnop)
  286. {
  287. const KWD *pkwd = KwdOfTok(tk);
  288. if (nullptr == pkwd)
  289. return FALSE;
  290. *popl = pkwd->prec1;
  291. *pnop = pkwd->nop1;
  292. return TRUE;
  293. }
  294. IdentPtr PidFromTk(tokens tk);
  295. IdentPtr PidHashName(LPCOLESTR psz)
  296. {
  297. size_t csz = wcslen(psz);
  298. Assert(csz <= ULONG_MAX);
  299. return PidHashNameLen(psz, static_cast<uint32>(csz));
  300. }
  301. template <typename CharType>
  302. IdentPtr PidHashNameLen(CharType const * psz, CharType const * end, uint32 cch);
  303. template <typename CharType>
  304. IdentPtr PidHashNameLen(CharType const * psz, uint32 cch);
  305. template <typename CharType>
  306. IdentPtr PidHashNameLenWithHash(_In_reads_(cch) CharType const * psz, CharType const * end, int32 cch, uint32 luHash);
  307. template <typename CharType>
  308. inline IdentPtr FindExistingPid(
  309. CharType const * prgch,
  310. CharType const * end,
  311. int32 cch,
  312. uint32 luHash,
  313. IdentPtr **pppInsert,
  314. int32 *pBucketCount
  315. #if PROFILE_DICTIONARY
  316. , int& depth
  317. #endif
  318. );
  319. tokens TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode);
  320. tokens TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, uint32 cch);
  321. NoReleaseAllocator* GetAllocator() {return &m_noReleaseAllocator;}
  322. bool Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch);
  323. private:
  324. NoReleaseAllocator m_noReleaseAllocator; // to allocate identifiers
  325. Ident ** m_prgpidName; // hash table for names
  326. uint32 m_luMask; // hash mask
  327. uint32 m_luCount; // count of the number of entires in the hash table
  328. ErrHandler * m_perr; // error handler to use
  329. IdentPtr m_rpid[tkLimKwd];
  330. HashTbl(ErrHandler * perr)
  331. {
  332. m_prgpidName = nullptr;
  333. m_perr = perr;
  334. memset(&m_rpid, 0, sizeof(m_rpid));
  335. }
  336. ~HashTbl(void) {}
  337. // Called to grow the number of buckets in the table to reduce the table density.
  338. void Grow();
  339. // Automatically grow the table if a bucket's length grows beyond BucketLengthLimit and the table is densely populated.
  340. static const uint BucketLengthLimit = 5;
  341. // When growing the bucket size we'll grow by GrowFactor. GrowFactor MUST be a power of 2.
  342. static const uint GrowFactor = 4;
  343. #if DEBUG
  344. uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
  345. #endif
  346. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
  347. {
  348. return memcmp(psz1, psz2, (psz2end - psz2) * sizeof(OLECHAR)) == 0;
  349. }
  350. static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
  351. {
  352. return utf8::CharsAreEqual(psz1, psz2, psz2end, utf8::doAllowThreeByteSurrogates);
  353. }
  354. static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) char const * psz2, char const * psz2end)
  355. {
  356. while (psz2 < psz2end)
  357. {
  358. if (*psz1++ != *psz2++)
  359. {
  360. return false;
  361. }
  362. }
  363. return true;
  364. }
  365. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, __in_ecount(cch) LPCOLESTR psz2, LPCOLESTR psz2end, int32 cch)
  366. {
  367. Unused(psz2end);
  368. js_memcpy_s(psz1, cch * sizeof(OLECHAR), psz2, cch * sizeof(OLECHAR));
  369. psz1[cch] = 0;
  370. }
  371. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end, int32 cch)
  372. {
  373. Unused(cch);
  374. utf8::DecodeUnitsIntoAndNullTerminate(psz1, psz2, psz2end);
  375. }
  376. static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, __in_ecount(cch) char const * psz2, char const * psz2end, int32 cch)
  377. {
  378. Unused(psz2end);
  379. while (cch-- > 0)
  380. *(psz1++) = *psz2++;
  381. *psz1 = 0;
  382. }
  383. // note: on failure this may throw or return FALSE, depending on
  384. // where the failure occurred.
  385. BOOL Init(uint cidHash);
  386. /*************************************************************************/
  387. /* The following members are related to the keyword descriptor tables */
  388. /*************************************************************************/
  389. struct KWD
  390. {
  391. OpCode nop2;
  392. byte prec2;
  393. OpCode nop1;
  394. byte prec1;
  395. };
  396. struct ReservedWordInfo
  397. {
  398. StaticSym const * sym;
  399. ushort grfid;
  400. };
  401. static const ReservedWordInfo s_reservedWordInfo[tkID];
  402. static const KWD g_mptkkwd[tkLimKwd];
  403. static const KWD * KwdOfTok(tokens tk)
  404. { return (unsigned int)tk < tkLimKwd ? g_mptkkwd + tk : nullptr; }
  405. #if PROFILE_DICTIONARY
  406. DictionaryStats *stats;
  407. #endif
  408. };