Hash.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. #if PROFILE_DICTIONARY
  7. #include "DictionaryStats.h"
  8. #endif
  9. const HashTbl::KWD HashTbl::g_mptkkwd[tkLimKwd] =
  10. {
  11. { knopNone,0,knopNone,0 },
  12. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  13. { nop2,kopl##prec2,nop1,kopl##prec1 },
  14. #define TOK_DCL(tk,prec2,nop2,prec1,nop1) \
  15. { nop2,kopl##prec2,nop1,kopl##prec1 },
  16. #include "keywords.h"
  17. };
  18. const HashTbl::ReservedWordInfo HashTbl::s_reservedWordInfo[tkID] =
  19. {
  20. { nullptr, fidNil },
  21. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  22. { reinterpret_cast<const StaticSym*>(&g_ssym_##name), f },
  23. #include "keywords.h"
  24. };
  25. HashTbl * HashTbl::Create(uint cidHash, ErrHandler * perr)
  26. {
  27. HashTbl * phtbl;
  28. if (nullptr == (phtbl = HeapNewNoThrow(HashTbl,perr)))
  29. return nullptr;
  30. if (!phtbl->Init(cidHash))
  31. {
  32. delete phtbl;
  33. return nullptr;
  34. }
  35. return phtbl;
  36. }
  37. BOOL HashTbl::Init(uint cidHash)
  38. {
  39. // cidHash must be a power of two
  40. Assert(cidHash > 0 && 0 == (cidHash & (cidHash - 1)));
  41. int32 cb;
  42. /* Allocate and clear the hash bucket table */
  43. m_luMask = cidHash - 1;
  44. m_luCount = 0;
  45. // (Bug 1117873 - Windows OS Bugs)
  46. // Prefast: Verify that cidHash * sizeof(Ident *) does not cause an integer overflow
  47. // NoReleaseAllocator( ) takes int32 - so check for LONG_MAX
  48. // Win8 730594 - Use intsafe function to check for overflow.
  49. uint cbTemp;
  50. if (FAILED(UIntMult(cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  51. return FALSE;
  52. cb = cbTemp;
  53. if (nullptr == (m_prgpidName = (Ident **)m_noReleaseAllocator.Alloc(cb)))
  54. return FALSE;
  55. memset(m_prgpidName, 0, cb);
  56. #if PROFILE_DICTIONARY
  57. stats = DictionaryStats::Create(typeid(this).name(), cidHash);
  58. #endif
  59. return TRUE;
  60. }
  61. void HashTbl::Grow()
  62. {
  63. // Grow the bucket size by grow factor
  64. // Has the side-effect of inverting the order the pids appear in their respective buckets.
  65. uint cidHash = m_luMask + 1;
  66. uint n_cidHash = cidHash * GrowFactor;
  67. Assert(n_cidHash > 0 && 0 == (n_cidHash & (n_cidHash - 1)));
  68. // Win8 730594 - Use intsafe function to check for overflow.
  69. uint cbTemp;
  70. if (FAILED(UIntMult(n_cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  71. // It is fine to exit early here, we will just have a potentially densely populated hash table
  72. return;
  73. int32 cb = cbTemp;
  74. uint n_luMask = n_cidHash - 1;
  75. IdentPtr *n_prgpidName = (IdentPtr *)m_noReleaseAllocator.Alloc(cb);
  76. if (n_prgpidName == nullptr)
  77. // It is fine to exit early here, we will just have a potentially densely populated hash table
  78. return;
  79. // Clear the array
  80. memset(n_prgpidName, 0, cb);
  81. // Place each entry its new bucket.
  82. for (uint i = 0; i < cidHash; i++)
  83. {
  84. for (IdentPtr pid = m_prgpidName[i], next = pid ? pid->m_pidNext : nullptr; pid; pid = next, next = pid ? pid->m_pidNext : nullptr)
  85. {
  86. uint32 luHash = pid->m_luHash;
  87. uint32 luIndex = luHash & n_luMask;
  88. pid->m_pidNext = n_prgpidName[luIndex];
  89. n_prgpidName[luIndex] = pid;
  90. }
  91. }
  92. Assert(CountAndVerifyItems(n_prgpidName, n_cidHash, n_luMask) == m_luCount);
  93. // Update the table fields.
  94. m_prgpidName = n_prgpidName;
  95. m_luMask= n_luMask;
  96. #if PROFILE_DICTIONARY
  97. if(stats)
  98. {
  99. int emptyBuckets = 0;
  100. for (uint i = 0; i < n_cidHash; i++)
  101. {
  102. if(m_prgpidName[i] == nullptr)
  103. {
  104. emptyBuckets++;
  105. }
  106. }
  107. stats->Resize(n_cidHash, emptyBuckets);
  108. }
  109. #endif
  110. }
  111. #if DEBUG
  112. uint HashTbl::CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask)
  113. {
  114. uint count = 0;
  115. for (uint i = 0; i < bucketCount; i++)
  116. for (IdentPtr pid = buckets[i]; pid; pid = pid->m_pidNext)
  117. {
  118. Assert((pid->m_luHash & mask) == i);
  119. count++;
  120. }
  121. return count;
  122. }
  123. #endif
  124. #pragma warning(push)
  125. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  126. tokens Ident::Tk(bool isStrictMode)
  127. {
  128. const tokens token = (tokens)m_tk;
  129. if (token == tkLim)
  130. {
  131. m_tk = tkNone;
  132. const uint32 luHash = this->m_luHash;
  133. const LPCOLESTR prgch = Psz();
  134. const uint32 cch = Cch();
  135. #include "kwds_sw.h"
  136. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  137. LEqual_##name: \
  138. if (cch == g_ssym_##name.cch && \
  139. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  140. { \
  141. if (f) \
  142. this->m_grfid |= f; \
  143. this->m_tk = tk; \
  144. return ((f & fidKwdRsvd) || (isStrictMode && (f & fidKwdFutRsvd))) ? tk : tkID; \
  145. } \
  146. goto LDefault;
  147. #include "keywords.h"
  148. LDefault:
  149. return tkID;
  150. }
  151. else if (token == tkNone || !(m_grfid & fidKwdRsvd))
  152. {
  153. if ( !isStrictMode || !(m_grfid & fidKwdFutRsvd))
  154. {
  155. return tkID;
  156. }
  157. }
  158. return token;
  159. }
  160. #pragma warning(pop)
  161. void Ident::SetTk(tokens token, ushort grfid)
  162. {
  163. Assert(token != tkNone && token < tkID);
  164. if (m_tk == tkLim)
  165. {
  166. m_tk = (ushort)token;
  167. m_grfid |= grfid;
  168. }
  169. else
  170. {
  171. Assert(m_tk == token);
  172. Assert((m_grfid & grfid) == grfid);
  173. }
  174. }
  175. IdentPtr HashTbl::PidFromTk(tokens token)
  176. {
  177. Assert(token > tkNone && token < tkID);
  178. __analysis_assume(token > tkNone && token < tkID);
  179. // Create a pid so we can create a name node
  180. IdentPtr rpid = m_rpid[token];
  181. if (nullptr == rpid)
  182. {
  183. StaticSym const * sym = s_reservedWordInfo[token].sym;
  184. Assert(sym != nullptr);
  185. rpid = this->PidHashNameLenWithHash(sym->sz, sym->sz + sym->cch, sym->cch, sym->luHash);
  186. rpid->SetTk(token, s_reservedWordInfo[token].grfid);
  187. m_rpid[token] = rpid;
  188. }
  189. return rpid;
  190. }
  191. template <typename CharType>
  192. IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, CharType const * end, uint32 cch)
  193. {
  194. // NOTE: We use case sensitive hash during compilation, but the runtime
  195. // uses case insensitive hashing so it can do case insensitive lookups.
  196. uint32 luHash = CaseSensitiveComputeHash(prgch, end);
  197. return PidHashNameLenWithHash(prgch, end, cch, luHash);
  198. }
  199. template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, utf8char_t const * end, uint32 cch);
  200. template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, char const * end, uint32 cch);
  201. template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, char16 const * end, uint32 cch);
  202. template <typename CharType>
  203. IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, uint32 cch)
  204. {
  205. Assert(sizeof(CharType) == 2);
  206. return PidHashNameLen(prgch, prgch + cch, cch);
  207. };
  208. template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, uint32 cch);
  209. template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, uint32 cch);
  210. template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, uint32 cch);
  211. template <typename CharType>
  212. IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch, CharType const * end, int32 cch, uint32 luHash)
  213. {
  214. Assert(cch >= 0);
  215. AssertArrMemR(prgch, cch);
  216. Assert(luHash == CaseSensitiveComputeHash(prgch, end));
  217. IdentPtr * ppid;
  218. IdentPtr pid;
  219. LONG cb;
  220. int32 bucketCount;
  221. #if PROFILE_DICTIONARY
  222. int depth = 0;
  223. #endif
  224. pid = this->FindExistingPid(prgch, end, cch, luHash, &ppid, &bucketCount
  225. #if PROFILE_DICTIONARY
  226. , depth
  227. #endif
  228. );
  229. if (pid)
  230. {
  231. return pid;
  232. }
  233. if (bucketCount > BucketLengthLimit && m_luCount > m_luMask)
  234. {
  235. Grow();
  236. // ppid is now invalid because the Grow() moves the entries around.
  237. // Find the correct ppid by repeating the find of the end of the bucket
  238. // the new item will be placed in.
  239. // Note this is similar to the main find loop but does not count nor does it
  240. // look at the entries because we already proved above the entry is not in the
  241. // table, we just want to find the end of the bucket.
  242. ppid = &m_prgpidName[luHash & m_luMask];
  243. while (*ppid)
  244. ppid = &(*ppid)->m_pidNext;
  245. }
  246. #if PROFILE_DICTIONARY
  247. ++depth;
  248. if (stats)
  249. stats->Insert(depth);
  250. #endif
  251. //Windows OS Bug 1795286 : CENTRAL PREFAST RUN: inetcore\scriptengines\src\src\core\hash.cpp :
  252. // 'sizeof((*pid))+((cch+1))*sizeof(OLECHAR)' may be smaller than
  253. // '((cch+1))*sizeof(OLECHAR)'. This can be caused by integer overflows
  254. // or underflows. This could yield an incorrect buffer all
  255. /* Allocate space for the identifier */
  256. ULONG Len;
  257. if (FAILED(ULongAdd(cch, 1, &Len)) ||
  258. FAILED(ULongMult(Len, sizeof(OLECHAR), &Len)) ||
  259. FAILED(ULongAdd(Len, sizeof(*pid), &Len)) ||
  260. FAILED(ULongToLong(Len, &cb)))
  261. {
  262. cb = 0;
  263. m_perr->Throw(ERRnoMemory);
  264. }
  265. if (nullptr == (pid = (IdentPtr)m_noReleaseAllocator.Alloc(cb)))
  266. m_perr->Throw(ERRnoMemory);
  267. /* Insert the identifier into the hash list */
  268. *ppid = pid;
  269. // Increment the number of entries in the table.
  270. m_luCount++;
  271. /* Fill in the identifier record */
  272. pid->m_pidNext = nullptr;
  273. pid->m_tk = tkLim;
  274. pid->m_grfid = fidNil;
  275. pid->m_luHash = luHash;
  276. pid->m_cch = cch;
  277. pid->m_pidRefStack = nullptr;
  278. pid->m_propertyId = Js::Constants::NoProperty;
  279. pid->assignmentState = NotAssigned;
  280. HashTbl::CopyString(pid->m_sz, prgch, end, cch);
  281. return pid;
  282. }
  283. template <typename CharType>
  284. IdentPtr HashTbl::FindExistingPid(
  285. CharType const * prgch,
  286. CharType const * end,
  287. int32 cch,
  288. uint32 luHash,
  289. IdentPtr **pppInsert,
  290. int32 *pBucketCount
  291. #if PROFILE_DICTIONARY
  292. , int& depth
  293. #endif
  294. )
  295. {
  296. int32 bucketCount;
  297. IdentPtr pid;
  298. IdentPtr *ppid = &m_prgpidName[luHash & m_luMask];
  299. /* Search the hash table for an existing match */
  300. ppid = &m_prgpidName[luHash & m_luMask];
  301. for (bucketCount = 0; nullptr != (pid = *ppid); ppid = &pid->m_pidNext, bucketCount++)
  302. {
  303. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  304. HashTbl::CharsAreEqual(pid->m_sz, prgch, end))
  305. {
  306. return pid;
  307. }
  308. #if PROFILE_DICTIONARY
  309. ++depth;
  310. #endif
  311. }
  312. if (pBucketCount)
  313. {
  314. *pBucketCount = bucketCount;
  315. }
  316. if (pppInsert)
  317. {
  318. *pppInsert = ppid;
  319. }
  320. return nullptr;
  321. }
  322. template IdentPtr HashTbl::FindExistingPid<utf8char_t>(
  323. utf8char_t const * prgch, utf8char_t const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  324. #if PROFILE_DICTIONARY
  325. , int& depth
  326. #endif
  327. );
  328. template IdentPtr HashTbl::FindExistingPid<char>(
  329. char const * prgch, char const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  330. #if PROFILE_DICTIONARY
  331. , int& depth
  332. #endif
  333. );
  334. template IdentPtr HashTbl::FindExistingPid<char16>(
  335. char16 const * prgch, char16 const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  336. #if PROFILE_DICTIONARY
  337. , int& depth
  338. #endif
  339. );
  340. bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch)
  341. {
  342. uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
  343. for (auto pid = m_prgpidName[luHash & m_luMask]; pid; pid = pid->m_pidNext)
  344. {
  345. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  346. HashTbl::CharsAreEqual(pid->m_sz, prgch + cch, prgch))
  347. {
  348. return true;
  349. }
  350. }
  351. return false;
  352. }
  353. #include "HashFunc.cpp"
  354. #pragma warning(push)
  355. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  356. // Decide if token is keyword by string matching -
  357. // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
  358. tokens HashTbl::TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, uint32 cch)
  359. {
  360. uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
  361. // look for a keyword
  362. #include "kwds_sw.h"
  363. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  364. LEqual_##name: \
  365. if (cch == g_ssym_##name.cch && \
  366. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  367. { \
  368. return tk; \
  369. } \
  370. goto LDefault;
  371. #include "keywords.h"
  372. LDefault:
  373. return tkID;
  374. }
  375. #pragma warning(pop)
  376. #pragma warning(push)
  377. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  378. // Decide if token is keyword by string matching -
  379. // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
  380. tokens HashTbl::TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode)
  381. {
  382. uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
  383. // look for a keyword
  384. #include "kwds_sw.h"
  385. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  386. LEqual_##name: \
  387. if (cch == g_ssym_##name.cch && \
  388. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  389. { \
  390. return ((f & fidKwdRsvd) || (isStrictMode && (f & fidKwdFutRsvd))) ? tk : tkID; \
  391. } \
  392. goto LDefault;
  393. #include "keywords.h"
  394. LDefault:
  395. return tkID;
  396. }
  397. #pragma warning(pop)