Hash.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. #if PROFILE_DICTIONARY
  7. #include "DictionaryStats.h"
  8. #endif
  9. const HashTbl::KWD HashTbl::g_mptkkwd[tkLimKwd] =
  10. {
  11. { knopNone,0,knopNone,0 },
  12. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  13. { nop2,kopl##prec2,nop1,kopl##prec1 },
  14. #define TOK_DCL(tk,prec2,nop2,prec1,nop1) \
  15. { nop2,kopl##prec2,nop1,kopl##prec1 },
  16. #include "keywords.h"
  17. };
  18. const HashTbl::ReservedWordInfo HashTbl::s_reservedWordInfo[tkID] =
  19. {
  20. { nullptr, fidNil },
  21. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  22. { reinterpret_cast<const StaticSym*>(&g_ssym_##name), f },
  23. #include "keywords.h"
  24. };
  25. HashTbl * HashTbl::Create(uint cidHash)
  26. {
  27. HashTbl * phtbl;
  28. if (nullptr == (phtbl = HeapNewNoThrow(HashTbl)))
  29. return nullptr;
  30. if (!phtbl->Init(cidHash))
  31. {
  32. delete phtbl; // invokes overridden operator delete
  33. return nullptr;
  34. }
  35. return phtbl;
  36. }
  37. BOOL HashTbl::Init(uint cidHash)
  38. {
  39. // cidHash must be a power of two
  40. Assert(cidHash > 0 && 0 == (cidHash & (cidHash - 1)));
  41. int32 cb;
  42. /* Allocate and clear the hash bucket table */
  43. m_luMask = cidHash - 1;
  44. m_luCount = 0;
  45. // (Bug 1117873 - Windows OS Bugs)
  46. // Prefast: Verify that cidHash * sizeof(Ident *) does not cause an integer overflow
  47. // NoReleaseAllocator( ) takes int32 - so check for LONG_MAX
  48. // Win8 730594 - Use intsafe function to check for overflow.
  49. uint cbTemp;
  50. if (FAILED(UIntMult(cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  51. return FALSE;
  52. cb = cbTemp;
  53. if (nullptr == (m_prgpidName = (Ident **)m_noReleaseAllocator.Alloc(cb)))
  54. return FALSE;
  55. memset(m_prgpidName, 0, cb);
  56. #if PROFILE_DICTIONARY
  57. stats = DictionaryStats::Create(typeid(this).name(), cidHash);
  58. #endif
  59. return TRUE;
  60. }
  61. void HashTbl::Grow()
  62. {
  63. // Grow the bucket size by grow factor
  64. // Has the side-effect of inverting the order the pids appear in their respective buckets.
  65. uint cidHash = m_luMask + 1;
  66. uint n_cidHash = cidHash * GrowFactor;
  67. Assert(n_cidHash > 0 && 0 == (n_cidHash & (n_cidHash - 1)));
  68. // Win8 730594 - Use intsafe function to check for overflow.
  69. uint cbTemp;
  70. if (FAILED(UIntMult(n_cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  71. // It is fine to exit early here, we will just have a potentially densely populated hash table
  72. return;
  73. int32 cb = cbTemp;
  74. uint n_luMask = n_cidHash - 1;
  75. IdentPtr *n_prgpidName = (IdentPtr *)m_noReleaseAllocator.Alloc(cb);
  76. if (n_prgpidName == nullptr)
  77. // It is fine to exit early here, we will just have a potentially densely populated hash table
  78. return;
  79. // Clear the array
  80. memset(n_prgpidName, 0, cb);
  81. // Place each entry its new bucket.
  82. for (uint i = 0; i < cidHash; i++)
  83. {
  84. for (IdentPtr pid = m_prgpidName[i], next = pid ? pid->m_pidNext : nullptr; pid; pid = next, next = pid ? pid->m_pidNext : nullptr)
  85. {
  86. uint32 luHash = pid->m_luHash;
  87. uint32 luIndex = luHash & n_luMask;
  88. pid->m_pidNext = n_prgpidName[luIndex];
  89. n_prgpidName[luIndex] = pid;
  90. }
  91. }
  92. Assert(CountAndVerifyItems(n_prgpidName, n_cidHash, n_luMask) == m_luCount);
  93. // Update the table fields.
  94. m_prgpidName = n_prgpidName;
  95. m_luMask= n_luMask;
  96. #if PROFILE_DICTIONARY
  97. if(stats)
  98. {
  99. int emptyBuckets = 0;
  100. for (uint i = 0; i < n_cidHash; i++)
  101. {
  102. if(m_prgpidName[i] == nullptr)
  103. {
  104. emptyBuckets++;
  105. }
  106. }
  107. stats->Resize(n_cidHash, emptyBuckets);
  108. }
  109. #endif
  110. }
  111. void HashTbl::ClearPidRefStacks()
  112. {
  113. // Clear pidrefstack pointers from all existing pid's.
  114. for (uint i = 0; i < m_luMask; i++)
  115. {
  116. for (IdentPtr pid = m_prgpidName[i]; pid; pid = pid->m_pidNext)
  117. {
  118. pid->m_pidRefStack = nullptr;
  119. }
  120. }
  121. }
  122. #if DEBUG
  123. uint HashTbl::CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask)
  124. {
  125. uint count = 0;
  126. for (uint i = 0; i < bucketCount; i++)
  127. for (IdentPtr pid = buckets[i]; pid; pid = pid->m_pidNext)
  128. {
  129. Assert((pid->m_luHash & mask) == i);
  130. count++;
  131. }
  132. return count;
  133. }
  134. #endif
  135. #pragma warning(push)
  136. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  137. tokens Ident::Tk(bool isStrictMode)
  138. {
  139. const tokens token = (tokens)m_tk;
  140. if (token == tkLim)
  141. {
  142. m_tk = tkNone;
  143. const uint32 luHash = this->m_luHash;
  144. const LPCOLESTR prgch = Psz();
  145. const uint32 cch = Cch();
  146. #include "kwds_sw.h"
  147. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  148. LEqual_##name: \
  149. if (cch == g_ssym_##name.cch && \
  150. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  151. { \
  152. if (f) \
  153. this->m_grfid |= f; \
  154. this->m_tk = tk; \
  155. return ((f & fidKwdRsvd) || (isStrictMode && (f & fidKwdFutRsvd))) ? tk : tkID; \
  156. } \
  157. goto LDefault;
  158. #include "keywords.h"
  159. LDefault:
  160. return tkID;
  161. }
  162. else if (token == tkNone || !(m_grfid & fidKwdRsvd))
  163. {
  164. if ( !isStrictMode || !(m_grfid & fidKwdFutRsvd))
  165. {
  166. return tkID;
  167. }
  168. }
  169. return token;
  170. }
  171. #pragma warning(pop)
  172. void Ident::SetTk(tokens token, ushort grfid)
  173. {
  174. Assert(token != tkNone && token < tkID);
  175. if (m_tk == tkLim)
  176. {
  177. m_tk = (ushort)token;
  178. m_grfid |= grfid;
  179. }
  180. else
  181. {
  182. Assert(m_tk == token);
  183. Assert((m_grfid & grfid) == grfid);
  184. }
  185. }
  186. IdentPtr HashTbl::PidFromTk(tokens token)
  187. {
  188. Assert(token > tkNone && token < tkID);
  189. __analysis_assume(token > tkNone && token < tkID);
  190. // Create a pid so we can create a name node
  191. IdentPtr rpid = m_rpid[token];
  192. if (nullptr == rpid)
  193. {
  194. StaticSym const * sym = s_reservedWordInfo[token].sym;
  195. Assert(sym != nullptr);
  196. rpid = this->PidHashNameLenWithHash(sym->sz, sym->sz + sym->cch, sym->cch, sym->luHash);
  197. rpid->SetTk(token, s_reservedWordInfo[token].grfid);
  198. m_rpid[token] = rpid;
  199. }
  200. return rpid;
  201. }
  202. template <typename CharType>
  203. IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, CharType const * end, uint32 cch)
  204. {
  205. // NOTE: We use case sensitive hash during compilation, but the runtime
  206. // uses case insensitive hashing so it can do case insensitive lookups.
  207. uint32 luHash = CaseSensitiveComputeHash(prgch, end);
  208. return PidHashNameLenWithHash(prgch, end, cch, luHash);
  209. }
  210. template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, utf8char_t const * end, uint32 cch);
  211. template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, char const * end, uint32 cch);
  212. template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, char16 const * end, uint32 cch);
  213. template <typename CharType>
  214. IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, uint32 cch)
  215. {
  216. Assert(sizeof(CharType) == 2);
  217. return PidHashNameLen(prgch, prgch + cch, cch);
  218. };
  219. template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, uint32 cch);
  220. template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, uint32 cch);
  221. template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, uint32 cch);
  222. template <typename CharType>
  223. IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch, CharType const * end, int32 cch, uint32 luHash)
  224. {
  225. Assert(cch >= 0);
  226. AssertArrMemR(prgch, cch);
  227. Assert(luHash == CaseSensitiveComputeHash(prgch, end));
  228. IdentPtr * ppid = nullptr;
  229. IdentPtr pid;
  230. LONG cb;
  231. int32 bucketCount;
  232. #if PROFILE_DICTIONARY
  233. int depth = 0;
  234. #endif
  235. pid = this->FindExistingPid(prgch, end, cch, luHash, &ppid, &bucketCount
  236. #if PROFILE_DICTIONARY
  237. , depth
  238. #endif
  239. );
  240. if (pid)
  241. {
  242. return pid;
  243. }
  244. if (bucketCount > BucketLengthLimit && m_luCount > m_luMask)
  245. {
  246. Grow();
  247. // ppid is now invalid because the Grow() moves the entries around.
  248. // Find the correct ppid by repeating the find of the end of the bucket
  249. // the new item will be placed in.
  250. // Note this is similar to the main find loop but does not count nor does it
  251. // look at the entries because we already proved above the entry is not in the
  252. // table, we just want to find the end of the bucket.
  253. ppid = &m_prgpidName[luHash & m_luMask];
  254. while (*ppid)
  255. ppid = &(*ppid)->m_pidNext;
  256. }
  257. #if PROFILE_DICTIONARY
  258. ++depth;
  259. if (stats)
  260. stats->Insert(depth);
  261. #endif
  262. //Windows OS Bug 1795286 : CENTRAL PREFAST RUN: inetcore\scriptengines\src\src\core\hash.cpp :
  263. // 'sizeof((*pid))+((cch+1))*sizeof(OLECHAR)' may be smaller than
  264. // '((cch+1))*sizeof(OLECHAR)'. This can be caused by integer overflows
  265. // or underflows. This could yield an incorrect buffer all
  266. /* Allocate space for the identifier */
  267. ULONG Len;
  268. if (FAILED(ULongAdd(cch, 1, &Len)) ||
  269. FAILED(ULongMult(Len, sizeof(OLECHAR), &Len)) ||
  270. FAILED(ULongAdd(Len, sizeof(*pid), &Len)) ||
  271. FAILED(ULongToLong(Len, &cb)))
  272. {
  273. cb = 0;
  274. OutOfMemory();
  275. }
  276. if (nullptr == (pid = (IdentPtr)m_noReleaseAllocator.Alloc(cb)))
  277. OutOfMemory();
  278. /* Insert the identifier into the hash list */
  279. *ppid = pid;
  280. // Increment the number of entries in the table.
  281. m_luCount++;
  282. /* Fill in the identifier record */
  283. pid->m_pidNext = nullptr;
  284. pid->m_tk = tkLim;
  285. pid->m_grfid = fidNil;
  286. pid->m_luHash = luHash;
  287. pid->m_cch = cch;
  288. pid->m_pidRefStack = nullptr;
  289. pid->m_propertyId = Js::Constants::NoProperty;
  290. pid->assignmentState = NotAssigned;
  291. pid->isUsedInLdElem = false;
  292. HashTbl::CopyString(pid->m_sz, prgch, end);
  293. return pid;
  294. }
  295. template <typename CharType>
  296. IdentPtr HashTbl::FindExistingPid(
  297. CharType const * prgch,
  298. CharType const * end,
  299. int32 cch,
  300. uint32 luHash,
  301. IdentPtr **pppInsert,
  302. int32 *pBucketCount
  303. #if PROFILE_DICTIONARY
  304. , int& depth
  305. #endif
  306. )
  307. {
  308. int32 bucketCount;
  309. IdentPtr pid;
  310. IdentPtr *ppid = &m_prgpidName[luHash & m_luMask];
  311. /* Search the hash table for an existing match */
  312. ppid = &m_prgpidName[luHash & m_luMask];
  313. for (bucketCount = 0; nullptr != (pid = *ppid); ppid = &pid->m_pidNext, bucketCount++)
  314. {
  315. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  316. HashTbl::CharsAreEqual(pid->m_sz, prgch, end))
  317. {
  318. return pid;
  319. }
  320. #if PROFILE_DICTIONARY
  321. ++depth;
  322. #endif
  323. }
  324. if (pBucketCount)
  325. {
  326. *pBucketCount = bucketCount;
  327. }
  328. if (pppInsert)
  329. {
  330. *pppInsert = ppid;
  331. }
  332. return nullptr;
  333. }
  334. template IdentPtr HashTbl::FindExistingPid<utf8char_t>(
  335. utf8char_t const * prgch, utf8char_t const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  336. #if PROFILE_DICTIONARY
  337. , int& depth
  338. #endif
  339. );
  340. template IdentPtr HashTbl::FindExistingPid<char>(
  341. char const * prgch, char const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  342. #if PROFILE_DICTIONARY
  343. , int& depth
  344. #endif
  345. );
  346. template IdentPtr HashTbl::FindExistingPid<char16>(
  347. char16 const * prgch, char16 const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  348. #if PROFILE_DICTIONARY
  349. , int& depth
  350. #endif
  351. );
  352. bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch)
  353. {
  354. uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
  355. for (auto pid = m_prgpidName[luHash & m_luMask]; pid; pid = pid->m_pidNext)
  356. {
  357. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  358. HashTbl::CharsAreEqual(pid->m_sz, prgch + cch, prgch))
  359. {
  360. return true;
  361. }
  362. }
  363. return false;
  364. }
  365. #include "HashFunc.cpp"
  366. #pragma warning(push)
  367. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  368. // Decide if token is keyword by string matching -
  369. // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
  370. tokens HashTbl::TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode)
  371. {
  372. uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
  373. // look for a keyword
  374. #include "kwds_sw.h"
  375. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  376. LEqual_##name: \
  377. if (cch == g_ssym_##name.cch && \
  378. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  379. { \
  380. return ((f & fidKwdRsvd) || (isStrictMode && (f & fidKwdFutRsvd))) ? tk : tkID; \
  381. } \
  382. goto LDefault;
  383. #include "keywords.h"
  384. LDefault:
  385. return tkID;
  386. }
  387. #pragma warning(pop)
  388. __declspec(noreturn) void HashTbl::OutOfMemory()
  389. {
  390. throw ParseExceptionObject(ERRnoMemory);
  391. }