hash.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. #if PROFILE_DICTIONARY
  7. #include "DictionaryStats.h"
  8. #endif
  9. const HashTbl::KWD HashTbl::g_mptkkwd[tkLimKwd] =
  10. {
  11. { knopNone,0,knopNone,0 },
  12. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  13. { nop2,kopl##prec2,nop1,kopl##prec1 },
  14. #define TOK_DCL(tk,prec2,nop2,prec1,nop1) \
  15. { nop2,kopl##prec2,nop1,kopl##prec1 },
  16. #include "keywords.h"
  17. };
  18. const HashTbl::ReservedWordInfo HashTbl::s_reservedWordInfo[tkID] =
  19. {
  20. { nullptr, fidNil },
  21. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  22. { &g_ssym_##name, f },
  23. #include "keywords.h"
  24. };
  25. HashTbl * HashTbl::Create(uint cidHash, ErrHandler * perr)
  26. {
  27. HashTbl * phtbl;
  28. if (nullptr == (phtbl = HeapNewNoThrow(HashTbl,perr)))
  29. return nullptr;
  30. if (!phtbl->Init(cidHash))
  31. {
  32. delete phtbl;
  33. return nullptr;
  34. }
  35. return phtbl;
  36. }
  37. BOOL HashTbl::Init(uint cidHash)
  38. {
  39. // cidHash must be a power of two
  40. Assert(cidHash > 0 && 0 == (cidHash & (cidHash - 1)));
  41. long cb;
  42. /* Allocate and clear the hash bucket table */
  43. m_luMask = cidHash - 1;
  44. m_luCount = 0;
  45. // (Bug 1117873 - Windows OS Bugs)
  46. // Prefast: Verify that cidHash * sizeof(Ident *) does not cause an integer overflow
  47. // NoReleaseAllocator( ) takes long - so check for LONG_MAX
  48. // Win8 730594 - Use intsafe function to check for overflow.
  49. uint cbTemp;
  50. if (FAILED(UIntMult(cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  51. return FALSE;
  52. cb = cbTemp;
  53. if (nullptr == (m_prgpidName = (Ident **)m_noReleaseAllocator.Alloc(cb)))
  54. return FALSE;
  55. memset(m_prgpidName, 0, cb);
  56. #if PROFILE_DICTIONARY
  57. stats = DictionaryStats::Create(typeid(this).name(), cidHash);
  58. #endif
  59. return TRUE;
  60. }
  61. void HashTbl::Grow()
  62. {
  63. // Grow the bucket size by grow factor
  64. // Has the side-effect of inverting the order the pids appear in their respective buckets.
  65. uint cidHash = m_luMask + 1;
  66. uint n_cidHash = cidHash * GrowFactor;
  67. Assert(n_cidHash > 0 && 0 == (n_cidHash & (n_cidHash - 1)));
  68. // Win8 730594 - Use intsafe function to check for overflow.
  69. uint cbTemp;
  70. if (FAILED(UIntMult(n_cidHash, sizeof(Ident *), &cbTemp)) || cbTemp > LONG_MAX)
  71. // It is fine to exit early here, we will just have a potentially densely populated hash table
  72. return;
  73. long cb = cbTemp;
  74. uint n_luMask = n_cidHash - 1;
  75. IdentPtr *n_prgpidName = (IdentPtr *)m_noReleaseAllocator.Alloc(cb);
  76. if (n_prgpidName == nullptr)
  77. // It is fine to exit early here, we will just have a potentially densely populated hash table
  78. return;
  79. // Clear the array
  80. memset(n_prgpidName, 0, cb);
  81. // Place each entry its new bucket.
  82. for (uint i = 0; i < cidHash; i++)
  83. {
  84. for (IdentPtr pid = m_prgpidName[i], next = pid ? pid->m_pidNext : nullptr; pid; pid = next, next = pid ? pid->m_pidNext : nullptr)
  85. {
  86. ulong luHash = pid->m_luHash;
  87. ulong luIndex = luHash & n_luMask;
  88. pid->m_pidNext = n_prgpidName[luIndex];
  89. n_prgpidName[luIndex] = pid;
  90. }
  91. }
  92. Assert(CountAndVerifyItems(n_prgpidName, n_cidHash, n_luMask) == m_luCount);
  93. // Update the table fields.
  94. m_prgpidName = n_prgpidName;
  95. m_luMask= n_luMask;
  96. #if PROFILE_DICTIONARY
  97. if(stats)
  98. {
  99. int emptyBuckets = 0;
  100. for (uint i = 0; i < n_cidHash; i++)
  101. {
  102. if(m_prgpidName[i] == nullptr)
  103. {
  104. emptyBuckets++;
  105. }
  106. }
  107. stats->Resize(n_cidHash, emptyBuckets);
  108. }
  109. #endif
  110. }
  111. #if DEBUG
  112. uint HashTbl::CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask)
  113. {
  114. uint count = 0;
  115. for (uint i = 0; i < bucketCount; i++)
  116. for (IdentPtr pid = buckets[i]; pid; pid = pid->m_pidNext)
  117. {
  118. Assert((pid->m_luHash & mask) == i);
  119. count++;
  120. }
  121. return count;
  122. }
  123. #endif
  124. #pragma warning(push)
  125. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  126. tokens Ident::Tk(bool isStrictMode)
  127. {
  128. const tokens token = (tokens)m_tk;
  129. if (token == tkLim)
  130. {
  131. m_tk = tkNone;
  132. const ulong luHash = this->m_luHash;
  133. const LPCOLESTR prgch = Psz();
  134. const ulong cch = Cch();
  135. #include "kwds_sw.h"
  136. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  137. LEqual_##name: \
  138. if (cch == g_ssym_##name.cch && \
  139. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  140. { \
  141. if (f) \
  142. this->m_grfid |= f; \
  143. this->m_tk = tk; \
  144. return ((f & fidKwdRsvd) || (isStrictMode && (f & fidKwdFutRsvd))) ? tk : tkID; \
  145. } \
  146. goto LDefault;
  147. #include "keywords.h"
  148. LDefault:
  149. return tkID;
  150. }
  151. else if (token == tkNone || !(m_grfid & fidKwdRsvd))
  152. {
  153. if ( !isStrictMode || !(m_grfid & fidKwdFutRsvd))
  154. {
  155. return tkID;
  156. }
  157. }
  158. return token;
  159. }
  160. #pragma warning(pop)
  161. void Ident::SetTk(tokens token, ushort grfid)
  162. {
  163. Assert(token != tkNone && token < tkID);
  164. if (m_tk == tkLim)
  165. {
  166. m_tk = (ushort)token;
  167. m_grfid |= grfid;
  168. }
  169. else
  170. {
  171. Assert(m_tk == token);
  172. Assert((m_grfid & grfid) == grfid);
  173. }
  174. }
  175. IdentPtr HashTbl::PidFromTk(tokens token)
  176. {
  177. Assert(token > tkNone && token < tkID);
  178. __analysis_assume(token > tkNone && token < tkID);
  179. // Create a pid so we can create a name node
  180. IdentPtr rpid = m_rpid[token];
  181. if (nullptr == rpid)
  182. {
  183. StaticSym const * sym = s_reservedWordInfo[token].sym;
  184. Assert(sym != nullptr);
  185. rpid = this->PidHashNameLenWithHash(sym->sz, sym->cch, sym->luHash);
  186. rpid->SetTk(token, s_reservedWordInfo[token].grfid);
  187. m_rpid[token] = rpid;
  188. }
  189. return rpid;
  190. }
  191. template <typename CharType>
  192. IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, ulong cch)
  193. {
  194. // NOTE: We use case sensitive hash during compilation, but the runtime
  195. // uses case insensitive hashing so it can do case insensitive lookups.
  196. ulong luHash = CaseSensitiveComputeHashCch(prgch, cch);
  197. return PidHashNameLenWithHash(prgch, cch, luHash);
  198. };
  199. template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, ulong cch);
  200. template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, ulong cch);
  201. template IdentPtr HashTbl::PidHashNameLen<wchar_t>(wchar_t const * prgch, ulong cch);
  202. template <typename CharType>
  203. IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch, long cch, ulong luHash)
  204. {
  205. Assert(cch >= 0);
  206. AssertArrMemR(prgch, cch);
  207. Assert(luHash == CaseSensitiveComputeHashCch(prgch, cch));
  208. IdentPtr * ppid;
  209. IdentPtr pid;
  210. long cb;
  211. long bucketCount;
  212. #if PROFILE_DICTIONARY
  213. int depth = 0;
  214. #endif
  215. pid = this->FindExistingPid(prgch, cch, luHash, &ppid, &bucketCount
  216. #if PROFILE_DICTIONARY
  217. , depth
  218. #endif
  219. );
  220. if (pid)
  221. {
  222. return pid;
  223. }
  224. if (bucketCount > BucketLengthLimit && m_luCount > m_luMask)
  225. {
  226. Grow();
  227. // ppid is now invalid because the Grow() moves the entries around.
  228. // Find the correct ppid by repeating the find of the end of the bucket
  229. // the new item will be placed in.
  230. // Note this is similar to the main find loop but does not count nor does it
  231. // look at the entries because we already proved above the entry is not in the
  232. // table, we just want to find the end of the bucket.
  233. ppid = &m_prgpidName[luHash & m_luMask];
  234. while (*ppid)
  235. ppid = &(*ppid)->m_pidNext;
  236. }
  237. #if PROFILE_DICTIONARY
  238. ++depth;
  239. if (stats)
  240. stats->Insert(depth);
  241. #endif
  242. //Windows OS Bug 1795286 : CENTRAL PREFAST RUN: inetcore\scriptengines\src\src\core\hash.cpp :
  243. // 'sizeof((*pid))+((cch+1))*sizeof(OLECHAR)' may be smaller than
  244. // '((cch+1))*sizeof(OLECHAR)'. This can be caused by integer overflows
  245. // or underflows. This could yield an incorrect buffer all
  246. /* Allocate space for the identifier */
  247. ULONG Len;
  248. if (FAILED(ULongAdd(cch, 1, &Len)) ||
  249. FAILED(ULongMult(Len, sizeof(OLECHAR), &Len)) ||
  250. FAILED(ULongAdd(Len, sizeof(*pid), &Len)) ||
  251. FAILED(ULongToLong(Len, &cb)))
  252. {
  253. cb = 0;
  254. m_perr->Throw(ERRnoMemory);
  255. }
  256. if (nullptr == (pid = (IdentPtr)m_noReleaseAllocator.Alloc(cb)))
  257. m_perr->Throw(ERRnoMemory);
  258. /* Insert the identifier into the hash list */
  259. *ppid = pid;
  260. // Increment the number of entries in the table.
  261. m_luCount++;
  262. /* Fill in the identifier record */
  263. pid->m_pidNext = nullptr;
  264. pid->m_tk = tkLim;
  265. pid->m_grfid = fidNil;
  266. pid->m_luHash = luHash;
  267. pid->m_cch = cch;
  268. pid->m_pidRefStack = nullptr;
  269. pid->m_propertyId = Js::Constants::NoProperty;
  270. pid->assignmentState = NotAssigned;
  271. HashTbl::CopyString(pid->m_sz, prgch, cch);
  272. return pid;
  273. }
  274. template <typename CharType>
  275. IdentPtr HashTbl::FindExistingPid(
  276. CharType const * prgch,
  277. long cch,
  278. ulong luHash,
  279. IdentPtr **pppInsert,
  280. long *pBucketCount
  281. #if PROFILE_DICTIONARY
  282. , int& depth
  283. #endif
  284. )
  285. {
  286. long bucketCount;
  287. IdentPtr pid;
  288. IdentPtr *ppid = &m_prgpidName[luHash & m_luMask];
  289. /* Search the hash table for an existing match */
  290. ppid = &m_prgpidName[luHash & m_luMask];
  291. for (bucketCount = 0; nullptr != (pid = *ppid); ppid = &pid->m_pidNext, bucketCount++)
  292. {
  293. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  294. HashTbl::CharsAreEqual(pid->m_sz, prgch, cch))
  295. {
  296. return pid;
  297. }
  298. #if PROFILE_DICTIONARY
  299. ++depth;
  300. #endif
  301. }
  302. if (pBucketCount)
  303. {
  304. *pBucketCount = bucketCount;
  305. }
  306. if (pppInsert)
  307. {
  308. *pppInsert = ppid;
  309. }
  310. return nullptr;
  311. }
  312. template IdentPtr HashTbl::FindExistingPid<utf8char_t>(
  313. utf8char_t const * prgch, long cch, ulong luHash, IdentPtr **pppInsert, long *pBucketCount
  314. #if PROFILE_DICTIONARY
  315. , int& depth
  316. #endif
  317. );
  318. template IdentPtr HashTbl::FindExistingPid<char>(
  319. char const * prgch, long cch, ulong luHash, IdentPtr **pppInsert, long *pBucketCount
  320. #if PROFILE_DICTIONARY
  321. , int& depth
  322. #endif
  323. );
  324. template IdentPtr HashTbl::FindExistingPid<wchar_t>(
  325. wchar_t const * prgch, long cch, ulong luHash, IdentPtr **pppInsert, long *pBucketCount
  326. #if PROFILE_DICTIONARY
  327. , int& depth
  328. #endif
  329. );
  330. bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, long cch)
  331. {
  332. ulong luHash = CaseSensitiveComputeHashCch(prgch, cch);
  333. for (auto pid = m_prgpidName[luHash & m_luMask]; pid; pid = pid->m_pidNext)
  334. {
  335. if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
  336. HashTbl::CharsAreEqual(pid->m_sz, prgch, cch))
  337. {
  338. return true;
  339. }
  340. }
  341. return false;
  342. }
  343. #include "hashfunc.cpp"
  344. #pragma warning(push)
  345. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  346. // Decide if token is keyword by string matching -
  347. // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
  348. tokens HashTbl::TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, ulong cch)
  349. {
  350. ulong luHash = CaseSensitiveComputeHashCch(prgch, cch);
  351. // look for a keyword
  352. #include "kwds_sw.h"
  353. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  354. LEqual_##name: \
  355. if (cch == g_ssym_##name.cch && \
  356. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  357. { \
  358. return tk; \
  359. } \
  360. goto LDefault;
  361. #include "keywords.h"
  362. LDefault:
  363. return tkID;
  364. }
  365. #pragma warning(pop)
  366. #pragma warning(push)
  367. #pragma warning(disable:4740) // flow in or out of inline asm code suppresses global optimization
  368. // Decide if token is keyword by string matching -
  369. // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
  370. tokens HashTbl::TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, ulong cch, bool isStrictMode)
  371. {
  372. ulong luHash = CaseSensitiveComputeHashCch(prgch, cch);
  373. // look for a keyword
  374. #include "kwds_sw.h"
  375. #define KEYWORD(tk,f,prec2,nop2,prec1,nop1,name) \
  376. LEqual_##name: \
  377. if (cch == g_ssym_##name.cch && \
  378. 0 == memcmp(g_ssym_##name.sz, prgch, cch * sizeof(OLECHAR))) \
  379. { \
  380. return ((f & fidKwdRsvd) || (isStrictMode && (f & fidKwdFutRsvd))) ? tk : tkID; \
  381. } \
  382. goto LDefault;
  383. #include "keywords.h"
  384. LDefault:
  385. return tkID;
  386. }
  387. #pragma warning(pop)