2
0

Hash.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. #if PROFILE_DICTIONARY
  7. #include "DictionaryStats.h"
  8. #endif
  9. const HashTbl::KWD HashTbl::g_mptkkwd[tkLimKwd] =
  10. {
  11. { knopNone,0,knopNone,0 },
  12. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  13. { nop2,kopl##prec2,nop1,kopl##prec1 },
  14. #define TOK_DCL(tk,prec2,nop2,prec1,nop1) \
  15. { nop2,kopl##prec2,nop1,kopl##prec1 },
  16. #include "keywords.h"
  17. };
  18. const HashTbl::ReservedWordInfo HashTbl::s_reservedWordInfo[tkID] =
  19. {
  20. { nullptr, fidNil },
  21. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  22. { reinterpret_cast<const StaticSym*>(&g_ssym_##name), f },
  23. #include "keywords.h"
  24. };
  25. BOOL HashTbl::Init(uint cidHash)
  26. {
  27. // cidHash must be a power of two
  28. Assert(cidHash > 0 && 0 == (cidHash & (cidHash - 1)));
  29. int32 cb;
  30. /* Allocate and clear the hash bucket table */
  31. m_luMask = cidHash - 1;
  32. m_luCount = 0;
  33. // (Bug 1117873 - Windows OS Bugs)
  34. // Prefast: Verify that cidHash * sizeof(Ident *) does not cause an integer overflow
  35. // NoReleaseAllocator( ) takes int32 - so check for LONG_MAX
  36. // Win8 730594 - Use intsafe function to check for overflow.
  37. uint cbTemp;
  38. if (FAILED(UIntMult(cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  39. return FALSE;
  40. cb = cbTemp;
  41. if (nullptr == (m_prgpidName = (Ident **)m_noReleaseAllocator.Alloc(cb)))
  42. return FALSE;
  43. memset(m_prgpidName, 0, cb);
  44. #if PROFILE_DICTIONARY
  45. stats = DictionaryStats::Create(typeid(this).name(), cidHash);
  46. #endif
  47. return TRUE;
  48. }
  49. void HashTbl::Grow()
  50. {
  51. // Grow the bucket size by grow factor
  52. // Has the side-effect of inverting the order the pids appear in their respective buckets.
  53. uint cidHash = m_luMask + 1;
  54. uint n_cidHash = cidHash * GrowFactor;
  55. Assert(n_cidHash > 0 && 0 == (n_cidHash & (n_cidHash - 1)));
  56. // Win8 730594 - Use intsafe function to check for overflow.
  57. uint cbTemp;
  58. if (FAILED(UIntMult(n_cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  59. // It is fine to exit early here, we will just have a potentially densely populated hash table
  60. return;
  61. int32 cb = cbTemp;
  62. uint n_luMask = n_cidHash - 1;
  63. IdentPtr *n_prgpidName = (IdentPtr *)m_noReleaseAllocator.Alloc(cb);
  64. if (n_prgpidName == nullptr)
  65. // It is fine to exit early here, we will just have a potentially densely populated hash table
  66. return;
  67. // Clear the array
  68. memset(n_prgpidName, 0, cb);
  69. // Place each entry its new bucket.
  70. for (uint i = 0; i < cidHash; i++)
  71. {
  72. for (IdentPtr pid = m_prgpidName[i], next = pid ? pid->m_pidNext : nullptr; pid; pid = next, next = pid ? pid->m_pidNext : nullptr)
  73. {
  74. uint32 luHash = pid->m_luHash;
  75. uint32 luIndex = luHash & n_luMask;
  76. pid->m_pidNext = n_prgpidName[luIndex];
  77. n_prgpidName[luIndex] = pid;
  78. }
  79. }
  80. Assert(CountAndVerifyItems(n_prgpidName, n_cidHash, n_luMask) == m_luCount);
  81. // Update the table fields.
  82. m_prgpidName = n_prgpidName;
  83. m_luMask= n_luMask;
  84. #if PROFILE_DICTIONARY
  85. if(stats)
  86. {
  87. int emptyBuckets = 0;
  88. for (uint i = 0; i < n_cidHash; i++)
  89. {
  90. if(m_prgpidName[i] == nullptr)
  91. {
  92. emptyBuckets++;
  93. }
  94. }
  95. stats->Resize(n_cidHash, emptyBuckets);
  96. }
  97. #endif
  98. }
  99. #if DEBUG
  100. uint HashTbl::CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask)
  101. {
  102. uint count = 0;
  103. for (uint i = 0; i < bucketCount; i++)
  104. for (IdentPtr pid = buckets[i]; pid; pid = pid->m_pidNext)
  105. {
  106. Assert((pid->m_luHash & mask) == i);
  107. count++;
  108. }
  109. return count;
  110. }
  111. #endif
  112. #pragma warning(push)
  113. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  114. // Decide if token is keyword by string matching -
  115. // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
  116. tokens Ident::TkFromNameLen(uint32 luHash, _In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode, ushort * pgrfid, ushort * ptk)
  117. {
  118. // look for a keyword
  119. #include "kwds_sw.h"
  120. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  121. LEqual_##name: \
  122. if (cch == g_ssym_##name.cch && \
  123. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  124. { \
  125. if (f) \
  126. *pgrfid |= f; \
  127. *ptk = tk; \
  128. return ((f & fidKwdRsvd) || (isStrictMode && (f & fidKwdFutRsvd))) ? tk : tkID; \
  129. } \
  130. goto LDefault;
  131. #include "keywords.h"
  132. LDefault:
  133. return tkID;
  134. }
  135. #pragma warning(pop)
  136. #if DBG
  137. tokens Ident::TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode)
  138. {
  139. uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
  140. ushort grfid;
  141. ushort tk;
  142. return TkFromNameLen(luHash, prgch, cch, isStrictMode, &grfid, &tk);
  143. }
  144. #endif
  145. tokens Ident::Tk(bool isStrictMode)
  146. {
  147. const tokens token = (tokens)m_tk;
  148. if (token == tkLim)
  149. {
  150. m_tk = tkNone;
  151. const uint32 luHash = this->m_luHash;
  152. const LPCOLESTR prgch = Psz();
  153. const uint32 cch = Cch();
  154. return TkFromNameLen(luHash, prgch, cch, isStrictMode, &this->m_grfid, &this->m_tk);
  155. }
  156. else if (token == tkNone || !(m_grfid & fidKwdRsvd))
  157. {
  158. if ( !isStrictMode || !(m_grfid & fidKwdFutRsvd))
  159. {
  160. return tkID;
  161. }
  162. }
  163. return token;
  164. }
  165. void Ident::SetTk(tokens token, ushort grfid)
  166. {
  167. Assert(token != tkNone && token < tkID);
  168. if (m_tk == tkLim)
  169. {
  170. m_tk = (ushort)token;
  171. m_grfid |= grfid;
  172. }
  173. else
  174. {
  175. Assert(m_tk == token);
  176. Assert((m_grfid & grfid) == grfid);
  177. }
  178. }
  179. void Ident::TrySetIsUsedInLdElem(ParseNode * pnode)
  180. {
  181. if (pnode && pnode->nop == knopStr)
  182. {
  183. pnode->AsParseNodeStr()->pid->SetIsUsedInLdElem(true);
  184. }
  185. }
  186. IdentPtr HashTbl::PidFromTk(tokens token)
  187. {
  188. Assert(token > tkNone && token < tkID);
  189. __analysis_assume(token > tkNone && token < tkID);
  190. // Create a pid so we can create a name node
  191. IdentPtr rpid = m_rpid[token];
  192. if (nullptr == rpid)
  193. {
  194. StaticSym const * sym = s_reservedWordInfo[token].sym;
  195. Assert(sym != nullptr);
  196. rpid = this->PidHashNameLenWithHash(sym->sz, sym->sz + sym->cch, sym->cch, sym->luHash);
  197. rpid->SetTk(token, s_reservedWordInfo[token].grfid);
  198. m_rpid[token] = rpid;
  199. }
  200. return rpid;
  201. }
  202. template <typename CharType>
  203. IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, CharType const * end, uint32 cch)
  204. {
  205. // NOTE: We use case sensitive hash during compilation, but the runtime
  206. // uses case insensitive hashing so it can do case insensitive lookups.
  207. uint32 luHash = CaseSensitiveComputeHash(prgch, end);
  208. return PidHashNameLenWithHash(prgch, end, cch, luHash);
  209. }
  210. template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, utf8char_t const * end, uint32 cch);
  211. template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, char const * end, uint32 cch);
  212. template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, char16 const * end, uint32 cch);
  213. template <typename CharType>
  214. IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, uint32 cch)
  215. {
  216. Assert(sizeof(CharType) == 2);
  217. return PidHashNameLen(prgch, prgch + cch, cch);
  218. };
  219. template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, uint32 cch);
  220. template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, uint32 cch);
  221. template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, uint32 cch);
  222. template <typename CharType>
  223. IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch, CharType const * end, int32 cch, uint32 luHash)
  224. {
  225. Assert(cch >= 0);
  226. Assert(cch == 0 || prgch != nullptr);
  227. Assert(luHash == CaseSensitiveComputeHash(prgch, end));
  228. IdentPtr * ppid = nullptr;
  229. IdentPtr pid;
  230. LONG cb;
  231. int32 bucketCount;
  232. #if PROFILE_DICTIONARY
  233. int depth = 0;
  234. #endif
  235. pid = this->FindExistingPid(prgch, end, cch, luHash, &ppid, &bucketCount
  236. #if PROFILE_DICTIONARY
  237. , depth
  238. #endif
  239. );
  240. if (pid)
  241. {
  242. return pid;
  243. }
  244. if (bucketCount > BucketLengthLimit && m_luCount > m_luMask)
  245. {
  246. Grow();
  247. // ppid is now invalid because the Grow() moves the entries around.
  248. // Find the correct ppid by repeating the find of the end of the bucket
  249. // the new item will be placed in.
  250. // Note this is similar to the main find loop but does not count nor does it
  251. // look at the entries because we already proved above the entry is not in the
  252. // table, we just want to find the end of the bucket.
  253. ppid = &m_prgpidName[luHash & m_luMask];
  254. while (*ppid)
  255. ppid = &(*ppid)->m_pidNext;
  256. }
  257. #if PROFILE_DICTIONARY
  258. ++depth;
  259. if (stats)
  260. stats->Insert(depth);
  261. #endif
  262. //Windows OS Bug 1795286 : CENTRAL PREFAST RUN: inetcore\scriptengines\src\src\core\hash.cpp :
  263. // 'sizeof((*pid))+((cch+1))*sizeof(OLECHAR)' may be smaller than
  264. // '((cch+1))*sizeof(OLECHAR)'. This can be caused by integer overflows
  265. // or underflows. This could yield an incorrect buffer all
  266. /* Allocate space for the identifier */
  267. ULONG Len;
  268. if (FAILED(ULongAdd(cch, 1, &Len)) ||
  269. FAILED(ULongMult(Len, sizeof(OLECHAR), &Len)) ||
  270. FAILED(ULongAdd(Len, sizeof(*pid), &Len)) ||
  271. FAILED(ULongToLong(Len, &cb)))
  272. {
  273. cb = 0;
  274. OutOfMemory();
  275. }
  276. if (nullptr == (pid = (IdentPtr)m_noReleaseAllocator.Alloc(cb)))
  277. OutOfMemory();
  278. /* Insert the identifier into the hash list */
  279. *ppid = pid;
  280. // Increment the number of entries in the table.
  281. m_luCount++;
  282. /* Fill in the identifier record */
  283. pid->m_pidNext = nullptr;
  284. pid->m_tk = tkLim;
  285. pid->m_grfid = fidNil;
  286. pid->m_luHash = luHash;
  287. pid->m_cch = cch;
  288. pid->m_pidRefStack = nullptr;
  289. pid->m_propertyId = Js::Constants::NoProperty;
  290. pid->assignmentState = NotAssigned;
  291. pid->isUsedInLdElem = false;
  292. HashTbl::CopyString(pid->m_sz, prgch, end);
  293. return pid;
  294. }
  295. template <typename CharType>
  296. IdentPtr HashTbl::FindExistingPid(
  297. CharType const * prgch,
  298. CharType const * end,
  299. int32 cch,
  300. uint32 luHash,
  301. IdentPtr **pppInsert,
  302. int32 *pBucketCount
  303. #if PROFILE_DICTIONARY
  304. , int& depth
  305. #endif
  306. )
  307. {
  308. int32 bucketCount;
  309. IdentPtr pid;
  310. /* Search the hash table for an existing match */
  311. IdentPtr *ppid = &m_prgpidName[luHash & m_luMask];
  312. for (bucketCount = 0; nullptr != (pid = *ppid); ppid = &pid->m_pidNext, bucketCount++)
  313. {
  314. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  315. HashTbl::CharsAreEqual(pid->m_sz, prgch, end))
  316. {
  317. return pid;
  318. }
  319. #if PROFILE_DICTIONARY
  320. ++depth;
  321. #endif
  322. }
  323. if (pBucketCount)
  324. {
  325. *pBucketCount = bucketCount;
  326. }
  327. if (pppInsert)
  328. {
  329. *pppInsert = ppid;
  330. }
  331. return nullptr;
  332. }
  333. template IdentPtr HashTbl::FindExistingPid<utf8char_t>(
  334. utf8char_t const * prgch, utf8char_t const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  335. #if PROFILE_DICTIONARY
  336. , int& depth
  337. #endif
  338. );
  339. template IdentPtr HashTbl::FindExistingPid<char>(
  340. char const * prgch, char const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  341. #if PROFILE_DICTIONARY
  342. , int& depth
  343. #endif
  344. );
  345. template IdentPtr HashTbl::FindExistingPid<char16>(
  346. char16 const * prgch, char16 const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
  347. #if PROFILE_DICTIONARY
  348. , int& depth
  349. #endif
  350. );
  351. bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch)
  352. {
  353. uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
  354. for (auto pid = m_prgpidName[luHash & m_luMask]; pid; pid = pid->m_pidNext)
  355. {
  356. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  357. HashTbl::CharsAreEqual(pid->m_sz, prgch + cch, prgch))
  358. {
  359. return true;
  360. }
  361. }
  362. return false;
  363. }
  364. #include "HashFunc.cpp"
  365. __declspec(noreturn) void HashTbl::OutOfMemory()
  366. {
  367. throw ParseExceptionObject(ERRnoMemory);
  368. }