CharSet.cpp 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. #include "Common/MathUtil.h"
  7. namespace UnifiedRegex
  8. {
  9. // ----------------------------------------------------------------------
  10. // CharBitVec
  11. // ----------------------------------------------------------------------
  12. uint CharBitvec::Count() const
  13. {
  14. uint n = 0;
  15. for (int w = 0; w < vecSize; w++)
  16. {
  17. n += Math::PopCnt32(vec[w]);
  18. }
  19. return n;
  20. }
  21. int CharBitvec::NextSet(int k) const
  22. {
  23. if (k < 0 || k >= Size)
  24. return -1;
  25. uint w = k / wordSize;
  26. uint o = k % wordSize;
  27. uint32 v = vec[w] >> o;
  28. do
  29. {
  30. if (v == 0)
  31. {
  32. k += wordSize - o;
  33. break;
  34. }
  35. else if ((v & 0x1) != 0)
  36. return k;
  37. else
  38. {
  39. v >>= 1;
  40. o++;
  41. k++;
  42. }
  43. }
  44. while (o < wordSize);
  45. w++;
  46. while (w < vecSize)
  47. {
  48. o = 0;
  49. v = vec[w];
  50. do
  51. {
  52. if (v == 0)
  53. {
  54. k += wordSize - o;
  55. break;
  56. }
  57. else if ((v & 0x1) != 0)
  58. return k;
  59. else
  60. {
  61. v >>= 1;
  62. o++;
  63. k++;
  64. }
  65. }
  66. while (o < wordSize);
  67. w++;
  68. }
  69. return -1;
  70. }
  71. int CharBitvec::NextClear(int k) const
  72. {
  73. if (k < 0 || k >= Size)
  74. return -1;
  75. uint w = k / wordSize;
  76. uint o = k % wordSize;
  77. uint32 v = vec[w] >> o;
  78. do
  79. {
  80. if (v == ones)
  81. {
  82. k += wordSize - o;
  83. break;
  84. }
  85. else if ((v & 0x1) == 0)
  86. return k;
  87. else
  88. {
  89. v >>= 1;
  90. o++;
  91. k++;
  92. }
  93. }
  94. while (o < wordSize);
  95. w++;
  96. while (w < vecSize)
  97. {
  98. o = 0;
  99. v = vec[w];
  100. do
  101. {
  102. if (v == ones)
  103. {
  104. k += wordSize - o;
  105. break;
  106. }
  107. else if ((v & 0x1) == 0)
  108. return k;
  109. else
  110. {
  111. v >>= 1;
  112. o++;
  113. k++;
  114. }
  115. }
  116. while (o < wordSize);
  117. w++;
  118. }
  119. return -1;
  120. }
  121. template <typename C>
  122. void CharBitvec::ToComplement(ArenaAllocator* allocator, uint base, CharSet<C>& result) const
  123. {
  124. int hi = -1;
  125. while (true)
  126. {
  127. // Find the next range of clear bits in vector
  128. int li = NextClear(hi + 1);
  129. if (li < 0)
  130. return;
  131. hi = NextSet(li + 1);
  132. if (hi < 0)
  133. hi = Size - 1;
  134. else
  135. {
  136. Assert(hi > 0);
  137. hi--;
  138. }
  139. // Add range as characters
  140. result.SetRange(allocator, Chars<C>::ITC(base + li), Chars<C>::ITC(base + hi));
  141. }
  142. }
  143. template <typename C>
  144. void CharBitvec::ToEquivClass(ArenaAllocator* allocator, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset) const
  145. {
  146. int hi = -1;
  147. while (true)
  148. {
  149. // Find the next range of set bits in vector
  150. int li = NextSet(hi + 1);
  151. if (li < 0)
  152. return;
  153. hi = NextClear(li + 1);
  154. if (hi < 0)
  155. hi = Size - 1;
  156. else
  157. {
  158. Assert(hi > 0);
  159. hi--;
  160. }
  161. // Convert to character codes
  162. uint l = base + li + baseOffset;
  163. uint h = base + hi + baseOffset;
  164. do
  165. {
  166. uint acth;
  167. C equivl[CaseInsensitive::EquivClassSize];
  168. CaseInsensitive::RangeToEquivClass(tblidx, l, h, acth, equivl);
  169. uint n = acth - l;
  170. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  171. {
  172. result.SetRange(allocator, equivl[i], Chars<C>::Shift(equivl[i], n));
  173. }
  174. // Go around again for rest of this range
  175. l = acth + 1;
  176. }
  177. while (l <= h);
  178. }
  179. }
  180. // ----------------------------------------------------------------------
  181. // CharSetNode
  182. // ----------------------------------------------------------------------
  183. CharSetNode* CharSetNode::For(ArenaAllocator* allocator, int level)
  184. {
  185. if (level == 0)
  186. return Anew(allocator, CharSetLeaf);
  187. else
  188. return Anew(allocator, CharSetInner);
  189. }
  190. // ----------------------------------------------------------------------
  191. // CharSetFull
  192. // ----------------------------------------------------------------------
  193. CharSetFull CharSetFull::Instance;
  194. CharSetFull* const CharSetFull::TheFullNode = &CharSetFull::Instance;
  195. CharSetFull::CharSetFull() {}
  196. void CharSetFull::FreeSelf(ArenaAllocator* allocator)
  197. {
  198. Assert(this == TheFullNode);
  199. // Never allocated
  200. }
  201. CharSetNode* CharSetFull::Clone(ArenaAllocator* allocator) const
  202. {
  203. // Always shared
  204. return (CharSetNode*)this;
  205. }
  206. CharSetNode* CharSetFull::Set(ArenaAllocator* allocator, uint level, uint l, uint h)
  207. {
  208. return this;
  209. }
  210. CharSetNode* CharSetFull::ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h)
  211. {
  212. AssertMsg(h <= lim(level), "The range for clearing provided is invalid for this level.");
  213. AssertMsg(l <= h, "Can't clear where lower is bigger than the higher.");
  214. if (l == 0 && h == lim(level))
  215. {
  216. return nullptr;
  217. }
  218. CharSetNode* toReturn = For(allocator, level);
  219. if (l > 0)
  220. {
  221. AssertVerify(toReturn->Set(allocator, level, 0, l - 1) == toReturn);
  222. }
  223. if (h < lim(level))
  224. {
  225. AssertVerify(toReturn->Set(allocator, level, h + 1, lim(level)) == toReturn);
  226. }
  227. return toReturn;
  228. }
  229. CharSetNode* CharSetFull::UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other)
  230. {
  231. return this;
  232. }
  233. bool CharSetFull::Get(uint level, uint k) const
  234. {
  235. return true;
  236. }
  237. void CharSetFull::ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const
  238. {
  239. // Empty, so add nothing
  240. }
  241. void CharSetFull::ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const
  242. {
  243. this->ToEquivClass<char16>(allocator, level, base, tblidx, result);
  244. }
  245. void CharSetFull::ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const
  246. {
  247. this->ToEquivClass<codepoint_t>(allocator, level, base, tblidx, result, baseOffset);
  248. }
  249. template <typename C>
  250. void CharSetFull::ToEquivClass(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset) const
  251. {
  252. uint l = base + (CharSetNode::levels - 1 == level ? 0xff : 0) + baseOffset;
  253. uint h = base + lim(level) + baseOffset;
  254. do
  255. {
  256. uint acth;
  257. C equivl[CaseInsensitive::EquivClassSize];
  258. CaseInsensitive::RangeToEquivClass(tblidx, l, h, acth, equivl);
  259. uint n = acth - l;
  260. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  261. {
  262. result.SetRange(allocator, equivl[i], Chars<C>::Shift(equivl[i], n));
  263. }
  264. // Go around again for rest of this range
  265. l = acth + 1;
  266. }
  267. while (l <= h);
  268. }
  269. bool CharSetFull::IsSubsetOf(uint level, const CharSetNode* other) const
  270. {
  271. Assert(other != nullptr);
  272. return other == TheFullNode;
  273. }
  274. bool CharSetFull::IsEqualTo(uint level, const CharSetNode* other) const
  275. {
  276. Assert(other != nullptr);
  277. return other == TheFullNode;
  278. }
  279. uint CharSetFull::Count(uint level) const
  280. {
  281. return lim(level) + 1;
  282. }
  283. _Success_(return)
  284. bool CharSetFull::GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const
  285. {
  286. Assert(searchCharStart < this->Count(level));
  287. *outLowerChar = searchCharStart;
  288. *outHigherChar = (Char)this->Count(level) - 1;
  289. return true;
  290. }
  291. #if DBG
  292. bool CharSetFull::IsLeaf() const
  293. {
  294. return false;
  295. }
  296. #endif
  297. // ----------------------------------------------------------------------
  298. // CharSetInner
  299. // ----------------------------------------------------------------------
  300. CharSetInner::CharSetInner()
  301. {
  302. for (uint i = 0; i < branchingPerInnerLevel; i++)
  303. children[i] = 0;
  304. }
  305. void CharSetInner::FreeSelf(ArenaAllocator* allocator)
  306. {
  307. for (uint i = 0; i < branchingPerInnerLevel; i++)
  308. {
  309. if (children[i] != 0)
  310. {
  311. children[i]->FreeSelf(allocator);
  312. #if DBG
  313. children[i] = 0;
  314. #endif
  315. }
  316. }
  317. Adelete(allocator, this);
  318. }
  319. CharSetNode* CharSetInner::Clone(ArenaAllocator* allocator) const
  320. {
  321. CharSetInner* res = Anew(allocator, CharSetInner);
  322. for (uint i = 0; i < branchingPerInnerLevel; i++)
  323. {
  324. if (children[i] != 0)
  325. res->children[i] = children[i]->Clone(allocator);
  326. }
  327. return res;
  328. }
  329. CharSetNode* CharSetInner::ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h)
  330. {
  331. Assert(level > 0);
  332. AssertMsg(h <= lim(level), "The range for clearing provided is invalid for this level.");
  333. AssertMsg(l <= h, "Can't clear where lower is bigger than the higher.");
  334. if (l == 0 && h == lim(level))
  335. {
  336. return nullptr;
  337. }
  338. uint lowerIndex = innerIdx(level, l);
  339. uint higherIndex = innerIdx(level--, h);
  340. l = l & lim(level);
  341. h = h & lim(level);
  342. if (lowerIndex == higherIndex)
  343. {
  344. if (children[lowerIndex] != nullptr)
  345. {
  346. children[lowerIndex] = children[lowerIndex]->ClearRange(allocator, level, l, h);
  347. }
  348. }
  349. else
  350. {
  351. if (children[lowerIndex] != nullptr)
  352. {
  353. children[lowerIndex] = children[lowerIndex]->ClearRange(allocator, level, l, lim(level));
  354. }
  355. for (uint i = lowerIndex + 1; i < higherIndex; i++)
  356. {
  357. if (children[i] != nullptr)
  358. {
  359. children[i]->FreeSelf(allocator);
  360. }
  361. children[i] = nullptr;
  362. }
  363. if (children[higherIndex] != nullptr)
  364. {
  365. children[higherIndex] = children[higherIndex]->ClearRange(allocator, level, 0, h);
  366. }
  367. }
  368. for (int i = 0; i < branchingPerInnerLevel; i++)
  369. {
  370. if (children[i] != nullptr)
  371. {
  372. return this;
  373. }
  374. }
  375. return nullptr;
  376. }
  377. CharSetNode* CharSetInner::Set(ArenaAllocator* allocator, uint level, uint l, uint h)
  378. {
  379. Assert(level > 0);
  380. uint li = innerIdx(level, l);
  381. uint hi = innerIdx(level--, h);
  382. bool couldBeFull = true;
  383. if (li == hi)
  384. {
  385. if (children[li] == nullptr)
  386. {
  387. if (remain(level, l) == 0 && remain(level, h + 1) == 0)
  388. children[li] = CharSetFull::TheFullNode;
  389. else
  390. {
  391. children[li] = For(allocator, level);
  392. children[li] = children[li]->Set(allocator, level, l, h);
  393. couldBeFull = false;
  394. }
  395. }
  396. else
  397. children[li] = children[li]->Set(allocator, level, l, h);
  398. }
  399. else
  400. {
  401. if (children[li] == nullptr)
  402. {
  403. if (remain(level, l) == 0)
  404. children[li] = CharSetFull::TheFullNode;
  405. else
  406. {
  407. children[li] = For(allocator, level);
  408. children[li] = children[li]->Set(allocator, level, l, lim(level));
  409. couldBeFull = false;
  410. }
  411. }
  412. else
  413. children[li] = children[li]->Set(allocator, level, l, lim(level));
  414. for (uint i = li + 1; i < hi; i++)
  415. {
  416. if (children[i] != nullptr)
  417. children[i]->FreeSelf(allocator);
  418. children[i] = CharSetFull::TheFullNode;
  419. }
  420. if (children[hi] == nullptr)
  421. {
  422. if (remain(level, h + 1) == 0)
  423. children[hi] = CharSetFull::TheFullNode;
  424. else
  425. {
  426. children[hi] = For(allocator, level);
  427. children[hi] = children[hi]->Set(allocator, level, 0, h);
  428. couldBeFull = false;
  429. }
  430. }
  431. else
  432. children[hi] = children[hi]->Set(allocator, level, 0, h);
  433. }
  434. if (couldBeFull)
  435. {
  436. for (uint i = 0; i < branchingPerInnerLevel; i++)
  437. {
  438. if (children[i] != CharSetFull::TheFullNode)
  439. return this;
  440. }
  441. FreeSelf(allocator);
  442. return CharSetFull::TheFullNode;
  443. }
  444. else
  445. return this;
  446. }
  447. CharSetNode* CharSetInner::UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other)
  448. {
  449. Assert(level > 0);
  450. Assert(other != nullptr && other != CharSetFull::TheFullNode && !other->IsLeaf());
  451. CharSetInner* otherInner = (CharSetInner*)other;
  452. level--;
  453. bool isFull = true;
  454. for (uint i = 0; i < branchingPerInnerLevel; i++)
  455. {
  456. if (otherInner->children[i] != nullptr)
  457. {
  458. if (otherInner->children[i] == CharSetFull::TheFullNode)
  459. {
  460. if (children[i] != nullptr)
  461. children[i]->FreeSelf(allocator);
  462. children[i] = CharSetFull::TheFullNode;
  463. }
  464. else
  465. {
  466. if (children[i] == nullptr)
  467. children[i] = For(allocator, level);
  468. children[i] = children[i]->UnionInPlace(allocator, level, otherInner->children[i]);
  469. if (children[i] != CharSetFull::TheFullNode)
  470. isFull = false;
  471. }
  472. }
  473. else if (children[i] != CharSetFull::TheFullNode)
  474. isFull = false;
  475. }
  476. if (isFull)
  477. {
  478. FreeSelf(allocator);
  479. return CharSetFull::TheFullNode;
  480. }
  481. else
  482. return this;
  483. }
  484. bool CharSetInner::Get(uint level, uint k) const
  485. {
  486. Assert(level > 0);
  487. uint i = innerIdx(level--, k);
  488. if (children[i] == nullptr)
  489. return false;
  490. else
  491. return children[i]->Get(level, k);
  492. }
  493. void CharSetInner::ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const
  494. {
  495. Assert(level > 0);
  496. level--;
  497. uint delta = lim(level) + 1;
  498. for (uint i = 0; i < branchingPerInnerLevel; i++)
  499. {
  500. if (children[i] == nullptr)
  501. // Caution: Part of the range for this child may overlap with direct vector
  502. result.SetRange(allocator, UTC(max(base, directSize)), UTC(base + delta - 1));
  503. else
  504. children[i]->ToComplement(allocator, level, base, result);
  505. base += delta;
  506. }
  507. }
  508. void CharSetInner::ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const
  509. {
  510. Assert(level > 0);
  511. level--;
  512. uint delta = lim(level) + 1;
  513. for (uint i = 0; i < branchingPerInnerLevel; i++)
  514. {
  515. if (children[i] != nullptr)
  516. {
  517. children[i]->ToEquivClassW(allocator, level, base, tblidx, result);
  518. }
  519. base += delta;
  520. }
  521. }
  522. void CharSetInner::ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const
  523. {
  524. Assert(level > 0);
  525. level--;
  526. uint delta = lim(level) + 1;
  527. for (uint i = 0; i < branchingPerInnerLevel; i++)
  528. {
  529. if (children[i] != nullptr)
  530. {
  531. children[i]->ToEquivClassCP(allocator, level, base, tblidx, result, baseOffset);
  532. }
  533. base += delta;
  534. }
  535. }
  536. bool CharSetInner::IsSubsetOf(uint level, const CharSetNode* other) const
  537. {
  538. Assert(level > 0);
  539. Assert(other != nullptr && !other->IsLeaf());
  540. if (other == CharSetFull::TheFullNode)
  541. return true;
  542. level--;
  543. const CharSetInner* otherInner = (CharSetInner*)other;
  544. for (uint i = 0; i < branchingPerInnerLevel; i++)
  545. {
  546. if (children[i] != nullptr)
  547. {
  548. if (otherInner->children[i] == nullptr)
  549. return false;
  550. if (children[i]->IsSubsetOf(level, otherInner->children[i]))
  551. return false;
  552. }
  553. }
  554. return true;
  555. }
  556. bool CharSetInner::IsEqualTo(uint level, const CharSetNode* other) const
  557. {
  558. Assert(level > 0);
  559. Assert(other != nullptr && !other->IsLeaf());
  560. if (other == CharSetFull::TheFullNode)
  561. return false;
  562. level--;
  563. const CharSetInner* otherInner = (CharSetInner*)other;
  564. for (uint i = 0; i < branchingPerInnerLevel; i++)
  565. {
  566. if (children[i] != 0)
  567. {
  568. if (otherInner->children[i] == nullptr)
  569. return false;
  570. if (children[i]->IsSubsetOf(level, otherInner->children[i]))
  571. return false;
  572. }
  573. }
  574. return true;
  575. }
  576. uint CharSetInner::Count(uint level) const
  577. {
  578. uint n = 0;
  579. Assert(level > 0);
  580. level--;
  581. for (uint i = 0; i < branchingPerInnerLevel; i++)
  582. {
  583. if (children[i] != nullptr)
  584. n += children[i]->Count(level);
  585. }
  586. return n;
  587. }
  588. _Success_(return)
  589. bool CharSetInner::GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const
  590. {
  591. Assert(searchCharStart < this->lim(level) + 1);
  592. uint innerIndex = innerIdx(level--, searchCharStart);
  593. Char currentLowChar = 0, currentHighChar = 0;
  594. for (; innerIndex < branchingPerInnerLevel; innerIndex++)
  595. {
  596. if (children[innerIndex] != nullptr && children[innerIndex]->GetNextRange(level, (Char)remain(level, searchCharStart), &currentLowChar, &currentHighChar))
  597. {
  598. break;
  599. }
  600. if (innerIndex < branchingPerInnerLevel - 1)
  601. {
  602. searchCharStart = (Char)indexToValue(level + 1, innerIndex + 1, 0);
  603. }
  604. }
  605. if (innerIndex == branchingPerInnerLevel)
  606. {
  607. return false;
  608. }
  609. currentLowChar = (Char)indexToValue(level + 1, innerIndex, currentLowChar);
  610. currentHighChar = (Char)indexToValue(level + 1, innerIndex, currentHighChar);
  611. innerIndex += 1;
  612. for (; remain(level, currentHighChar) == lim(level) && innerIndex < branchingPerInnerLevel; innerIndex++)
  613. {
  614. Char tempLower, tempHigher;
  615. if (children[innerIndex] == nullptr || !children[innerIndex]->GetNextRange(level, 0x0, &tempLower, &tempHigher) || remain(level, tempLower) != 0)
  616. {
  617. break;
  618. }
  619. currentHighChar = (Char)indexToValue(level + 1, innerIndex, tempHigher);
  620. }
  621. *outLowerChar = currentLowChar;
  622. *outHigherChar = currentHighChar;
  623. return true;
  624. }
  625. #if DBG
  626. bool CharSetInner::IsLeaf() const
  627. {
  628. return false;
  629. }
  630. #endif
  631. // ----------------------------------------------------------------------
  632. // CharSetLeaf
  633. // ----------------------------------------------------------------------
  634. CharSetLeaf::CharSetLeaf()
  635. {
  636. vec.Clear();
  637. }
  638. void CharSetLeaf::FreeSelf(ArenaAllocator* allocator)
  639. {
  640. Adelete(allocator, this);
  641. }
  642. CharSetNode* CharSetLeaf::Clone(ArenaAllocator* allocator) const
  643. {
  644. return Anew(allocator, CharSetLeaf, *this);
  645. }
  646. CharSetNode* CharSetLeaf::Set(ArenaAllocator* allocator, uint level, uint l, uint h)
  647. {
  648. Assert(level == 0);
  649. vec.SetRange(leafIdx(l), leafIdx(h));
  650. if (vec.IsFull())
  651. {
  652. FreeSelf(allocator);
  653. return CharSetFull::TheFullNode;
  654. }
  655. else
  656. return this;
  657. }
  658. CharSetNode* CharSetLeaf::ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h)
  659. {
  660. Assert(level == 0);
  661. AssertMsg(h <= lim(level), "The range for clearing provided is invalid for this level.");
  662. AssertMsg(l <= h, "Can't clear where lower is bigger than the higher.");
  663. if (l == 0 && h == lim(level))
  664. {
  665. return nullptr;
  666. }
  667. vec.ClearRange(leafIdx(l), leafIdx(h));
  668. if (vec.IsEmpty())
  669. {
  670. FreeSelf(allocator);
  671. return nullptr;
  672. }
  673. return this;
  674. }
  675. CharSetNode* CharSetLeaf::UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other)
  676. {
  677. Assert(level == 0);
  678. Assert(other != nullptr && other->IsLeaf());
  679. CharSetLeaf* otherLeaf = (CharSetLeaf*)other;
  680. if (vec.UnionInPlaceFullCheck(otherLeaf->vec))
  681. {
  682. FreeSelf(allocator);
  683. return CharSetFull::TheFullNode;
  684. }
  685. else
  686. return this;
  687. }
  688. bool CharSetLeaf::Get(uint level, uint k) const
  689. {
  690. Assert(level == 0);
  691. return vec.Get(leafIdx(k));
  692. }
  693. void CharSetLeaf::ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const
  694. {
  695. Assert(level == 0);
  696. vec.ToComplement<char16>(allocator, base, result);
  697. }
  698. void CharSetLeaf::ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const
  699. {
  700. this->ToEquivClass<char16>(allocator, level, base, tblidx, result);
  701. }
  702. void CharSetLeaf::ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const
  703. {
  704. this->ToEquivClass<codepoint_t>(allocator, level, base, tblidx, result, baseOffset);
  705. }
  706. template <typename C>
  707. void CharSetLeaf::ToEquivClass(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset) const
  708. {
  709. Assert(level == 0);
  710. vec.ToEquivClass<C>(allocator, base, tblidx, result, baseOffset);
  711. }
  712. bool CharSetLeaf::IsSubsetOf(uint level, const CharSetNode* other) const
  713. {
  714. Assert(level == 0);
  715. Assert(other != nullptr);
  716. if (other == CharSetFull::TheFullNode)
  717. return true;
  718. Assert(other->IsLeaf());
  719. CharSetLeaf* otherLeaf = (CharSetLeaf*)other;
  720. return vec.IsSubsetOf(otherLeaf->vec);
  721. }
  722. bool CharSetLeaf::IsEqualTo(uint level, const CharSetNode* other) const
  723. {
  724. Assert(level == 0);
  725. Assert(other != nullptr);
  726. if (other == CharSetFull::TheFullNode)
  727. return false;
  728. Assert(other->IsLeaf());
  729. CharSetLeaf* otherLeaf = (CharSetLeaf*)other;
  730. return vec.IsSubsetOf(otherLeaf->vec);
  731. }
  732. uint CharSetLeaf::Count(uint level) const
  733. {
  734. Assert(level == 0);
  735. return vec.Count();
  736. }
  737. _Success_(return)
  738. bool CharSetLeaf::GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const
  739. {
  740. Assert(searchCharStart < lim(level) + 1);
  741. int nextSet = vec.NextSet(searchCharStart);
  742. if (nextSet == -1)
  743. {
  744. return false;
  745. }
  746. *outLowerChar = (char16)nextSet;
  747. int nextClear = vec.NextClear(nextSet);
  748. *outHigherChar = UTC(nextClear == -1 ? lim(level) : nextClear - 1);
  749. return true;
  750. }
  751. #if DBG
  752. bool CharSetLeaf::IsLeaf() const
  753. {
  754. return true;
  755. }
  756. #endif
  757. // ----------------------------------------------------------------------
  758. // CharSet<char16>
  759. // ----------------------------------------------------------------------
  760. void CharSet<char16>::SwitchRepresentations(ArenaAllocator* allocator)
  761. {
  762. Assert(IsCompact());
  763. uint existCount = this->GetCompactLength();
  764. __assume(existCount <= MaxCompact);
  765. if (existCount <= MaxCompact)
  766. {
  767. Char existCs[MaxCompact];
  768. for (uint i = 0; i < existCount; i++)
  769. {
  770. existCs[i] = GetCompactChar(i);
  771. }
  772. rep.full.root = nullptr;
  773. rep.full.direct.Clear();
  774. for (uint i = 0; i < existCount; i++)
  775. Set(allocator, existCs[i]);
  776. }
  777. }
  778. void CharSet<char16>::Sort()
  779. {
  780. Assert(IsCompact());
  781. __assume(this->GetCompactLength() <= MaxCompact);
  782. for (uint i = 1; i < this->GetCompactLength(); i++)
  783. {
  784. uint curr = GetCompactCharU(i);
  785. for (uint j = 0; j < i; j++)
  786. {
  787. if (GetCompactCharU(j) > curr)
  788. {
  789. for (int k = i; k > (int)j; k--)
  790. {
  791. this->ReplaceCompactCharU(k, this->GetCompactCharU(k - 1));
  792. }
  793. this->ReplaceCompactCharU(j, curr);
  794. break;
  795. }
  796. }
  797. }
  798. }
  799. CharSet<char16>::CharSet()
  800. {
  801. Assert(sizeof(Node*) == sizeof(size_t));
  802. Assert(sizeof(CompactRep) == sizeof(FullRep));
  803. rep.compact.countPlusOne = 1;
  804. for (int i = 0; i < MaxCompact; i++)
  805. rep.compact.cs[i] = emptySlot;
  806. }
  807. void CharSet<char16>::FreeBody(ArenaAllocator* allocator)
  808. {
  809. if (!IsCompact() && rep.full.root != nullptr)
  810. {
  811. rep.full.root->FreeSelf(allocator);
  812. #if DBG
  813. rep.full.root = nullptr;
  814. #endif
  815. }
  816. }
  817. void CharSet<char16>::Clear(ArenaAllocator* allocator)
  818. {
  819. if (!IsCompact() && rep.full.root != nullptr)
  820. rep.full.root->FreeSelf(allocator);
  821. rep.compact.countPlusOne = 1;
  822. for (int i = 0; i < MaxCompact; i++)
  823. rep.compact.cs[i] = emptySlot;
  824. }
  825. void CharSet<char16>::CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other)
  826. {
  827. Clear(allocator);
  828. Assert(IsCompact());
  829. if (other.IsCompact())
  830. {
  831. this->SetCompactLength(other.GetCompactLength());
  832. for (uint i = 0; i < other.GetCompactLength(); i++)
  833. {
  834. this->ReplaceCompactCharU(i, other.GetCompactCharU(i));
  835. }
  836. }
  837. else
  838. {
  839. rep.full.root = other.rep.full.root == nullptr ? nullptr : other.rep.full.root->Clone(allocator);
  840. rep.full.direct.CloneFrom(other.rep.full.direct);
  841. }
  842. }
  843. void CharSet<char16>::CloneNonSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<Char>& other)
  844. {
  845. if (this->IsCompact())
  846. {
  847. for (uint i = 0; i < this->GetCompactLength(); i++)
  848. {
  849. Char c = this->GetCompactChar(i);
  850. uint uChar = CTU(c);
  851. if (uChar < 0xD800 || uChar > 0xDFFF)
  852. {
  853. other.Set(allocator, c);
  854. }
  855. }
  856. }
  857. else
  858. {
  859. other.rep.full.direct.CloneFrom(rep.full.direct);
  860. if (rep.full.root == nullptr)
  861. {
  862. other.rep.full.root = nullptr;
  863. }
  864. else
  865. {
  866. other.rep.full.root = rep.full.root->Clone(allocator);
  867. other.rep.full.root->ClearRange(allocator, CharSetNode::levels - 1, 0xD800, 0XDFFF);
  868. }
  869. }
  870. }
  871. void CharSet<char16>::CloneSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<Char>& other)
  872. {
  873. if (this->IsCompact())
  874. {
  875. for (uint i = 0; i < this->GetCompactLength(); i++)
  876. {
  877. Char c = this->GetCompactChar(i);
  878. uint uChar = CTU(c);
  879. if (0xD800 <= uChar && uChar <= 0xDFFF)
  880. {
  881. other.Set(allocator, c);
  882. }
  883. }
  884. }
  885. else
  886. {
  887. other.rep.full.direct.CloneFrom(rep.full.direct);
  888. if (rep.full.root == nullptr)
  889. {
  890. other.rep.full.root = nullptr;
  891. }
  892. else
  893. {
  894. other.rep.full.root = rep.full.root->Clone(allocator);
  895. other.rep.full.root->ClearRange(allocator, CharSetNode::levels - 1, 0, 0xD7FF);
  896. }
  897. }
  898. }
  899. void CharSet<char16>::SubtractRange(ArenaAllocator* allocator, Char lowerChar, Char higherChar)
  900. {
  901. uint lowerValue = CTU(lowerChar);
  902. uint higherValue = CTU(higherChar);
  903. if (higherValue < lowerValue)
  904. return;
  905. if (IsCompact())
  906. {
  907. for (uint i = 0; i < this->GetCompactLength(); )
  908. {
  909. uint value = this->GetCompactCharU(i);
  910. if (value >= lowerValue && value <= higherValue)
  911. {
  912. this->RemoveCompactChar(i);
  913. }
  914. else
  915. {
  916. i++;
  917. }
  918. }
  919. }
  920. else if(lowerValue == 0 && higherValue == MaxUChar)
  921. {
  922. this->Clear(allocator);
  923. }
  924. else
  925. {
  926. if (lowerValue < CharSetNode::directSize)
  927. {
  928. uint maxDirectValue = min(higherValue, CharSetNode::directSize - 1);
  929. rep.full.direct.ClearRange(lowerValue, maxDirectValue);
  930. }
  931. if (rep.full.root != nullptr)
  932. {
  933. rep.full.root = rep.full.root->ClearRange(allocator, CharSetNode::levels - 1, lowerValue, higherValue);
  934. }
  935. }
  936. }
  937. void CharSet<char16>::SetRange(ArenaAllocator* allocator, Char lc, Char hc)
  938. {
  939. uint l = CTU(lc);
  940. uint h = CTU(hc);
  941. if (h < l)
  942. return;
  943. if (IsCompact())
  944. {
  945. if (h - l < MaxCompact)
  946. {
  947. do
  948. {
  949. uint i;
  950. for (i = 0; i < this->GetCompactLength(); i++)
  951. {
  952. __assume(l <= MaxUChar);
  953. if (l <= MaxUChar && i < MaxCompact)
  954. {
  955. if (this->GetCompactCharU(i) == l)
  956. break;
  957. }
  958. }
  959. if (i == this->GetCompactLength())
  960. {
  961. // Character not already in compact set
  962. if (i < MaxCompact)
  963. {
  964. this->AddCompactCharU(l);
  965. }
  966. else
  967. // Must switch representations
  968. break;
  969. }
  970. l++;
  971. }
  972. while (l <= h);
  973. if (h < l)
  974. // All chars are now in compact set
  975. return;
  976. // else: fall-through to general case for remaining chars
  977. }
  978. // else: no use even trying
  979. SwitchRepresentations(allocator);
  980. }
  981. Assert(!IsCompact());
  982. if (l == 0 && h == MaxUChar)
  983. {
  984. rep.full.direct.SetRange(0, CharSetNode::directSize - 1);
  985. if (rep.full.root != nullptr)
  986. rep.full.root->FreeSelf(allocator);
  987. rep.full.root = CharSetFull::TheFullNode;
  988. }
  989. else
  990. {
  991. if (l < CharSetNode::directSize)
  992. {
  993. if (h < CharSetNode::directSize)
  994. {
  995. rep.full.direct.SetRange(l, h);
  996. return;
  997. }
  998. rep.full.direct.SetRange(l, CharSetNode::directSize - 1);
  999. l = CharSetNode::directSize;
  1000. }
  1001. if (rep.full.root == nullptr)
  1002. rep.full.root = Anew(allocator, CharSetInner);
  1003. rep.full.root = rep.full.root->Set(allocator, CharSetNode::levels - 1, l, h);
  1004. }
  1005. }
  1006. void CharSet<char16>::SetRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs)
  1007. {
  1008. for (int i = 0; i < numSortedPairs * 2; i += 2)
  1009. {
  1010. Assert(i == 0 || sortedPairs[i-1] < sortedPairs[i]);
  1011. Assert(sortedPairs[i] <= sortedPairs[i+1]);
  1012. SetRange(allocator, sortedPairs[i], sortedPairs[i+1]);
  1013. }
  1014. }
  1015. void CharSet<char16>::SetNotRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs)
  1016. {
  1017. if (numSortedPairs == 0)
  1018. SetRange(allocator, MinChar, MaxChar);
  1019. else
  1020. {
  1021. if (sortedPairs[0] != MinChar)
  1022. SetRange(allocator, MinChar, sortedPairs[0] - 1);
  1023. for (int i = 1; i < numSortedPairs * 2 - 1; i += 2)
  1024. SetRange(allocator, sortedPairs[i] + 1, sortedPairs[i+1] - 1);
  1025. if (sortedPairs[numSortedPairs * 2 - 1] != MaxChar)
  1026. SetRange(allocator, sortedPairs[numSortedPairs * 2 - 1] + 1, MaxChar);
  1027. }
  1028. }
  1029. void CharSet<char16>::UnionInPlace(ArenaAllocator* allocator, const CharSet<Char>& other)
  1030. {
  1031. if (other.IsCompact())
  1032. {
  1033. for (uint i = 0; i < other.GetCompactLength(); i++)
  1034. {
  1035. Set(allocator, other.GetCompactChar(i));
  1036. }
  1037. return;
  1038. }
  1039. if (IsCompact())
  1040. SwitchRepresentations(allocator);
  1041. Assert(!IsCompact() && !other.IsCompact());
  1042. rep.full.direct.UnionInPlace(other.rep.full.direct);
  1043. if (other.rep.full.root != nullptr)
  1044. {
  1045. if (other.rep.full.root == CharSetFull::TheFullNode)
  1046. {
  1047. if (rep.full.root != nullptr)
  1048. rep.full.root->FreeSelf(allocator);
  1049. rep.full.root = CharSetFull::TheFullNode;
  1050. }
  1051. else
  1052. {
  1053. if (rep.full.root == nullptr)
  1054. rep.full.root = Anew(allocator, CharSetInner);
  1055. rep.full.root = rep.full.root->UnionInPlace(allocator, CharSetNode::levels - 1, other.rep.full.root);
  1056. }
  1057. }
  1058. }
  1059. _Success_(return)
  1060. bool CharSet<char16>::GetNextRange(Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar)
  1061. {
  1062. int count = this->Count();
  1063. if (count == 0)
  1064. {
  1065. return false;
  1066. }
  1067. else if (count == 1)
  1068. {
  1069. Char singleton = this->Singleton();
  1070. if (singleton < searchCharStart)
  1071. {
  1072. return false;
  1073. }
  1074. *outLowerChar = *outHigherChar = singleton;
  1075. return true;
  1076. }
  1077. if (IsCompact())
  1078. {
  1079. this->Sort();
  1080. uint i = 0;
  1081. size_t compactLength = this->GetCompactLength();
  1082. for (; i < compactLength; i++)
  1083. {
  1084. Char nextChar = this->GetCompactChar(i);
  1085. if (nextChar >= searchCharStart)
  1086. {
  1087. *outLowerChar = *outHigherChar = nextChar;
  1088. break;
  1089. }
  1090. }
  1091. if (i == compactLength)
  1092. {
  1093. return false;
  1094. }
  1095. i++;
  1096. for (; i < compactLength; i++)
  1097. {
  1098. Char nextChar = this->GetCompactChar(i);
  1099. if (nextChar != *outHigherChar + 1)
  1100. {
  1101. return true;
  1102. }
  1103. *outHigherChar += 1;
  1104. }
  1105. return true;
  1106. }
  1107. else
  1108. {
  1109. bool found = false;
  1110. if (CTU(searchCharStart) < CharSetNode::directSize)
  1111. {
  1112. int nextSet = rep.full.direct.NextSet(searchCharStart);
  1113. if (nextSet != -1)
  1114. {
  1115. found = true;
  1116. *outLowerChar = (char16)nextSet;
  1117. int nextClear = rep.full.direct.NextClear(nextSet);
  1118. if (nextClear != -1)
  1119. {
  1120. *outHigherChar = UTC(nextClear - 1);
  1121. return true;
  1122. }
  1123. *outHigherChar = CharSetNode::directSize - 1;
  1124. }
  1125. }
  1126. if (rep.full.root == nullptr)
  1127. {
  1128. return found;
  1129. }
  1130. Char tempLowChar = 0, tempHighChar = 0;
  1131. if (found)
  1132. {
  1133. searchCharStart = *outHigherChar + 1;
  1134. }
  1135. else
  1136. {
  1137. searchCharStart = searchCharStart > CharSetNode::directSize ? searchCharStart : CharSetNode::directSize;
  1138. }
  1139. if (rep.full.root->GetNextRange(CharSetNode::levels - 1, searchCharStart, &tempLowChar, &tempHighChar) && (!found || tempLowChar == *outHigherChar + 1))
  1140. {
  1141. if (!found)
  1142. {
  1143. *outLowerChar = tempLowChar;
  1144. }
  1145. *outHigherChar = tempHighChar;
  1146. return true;
  1147. }
  1148. return found;
  1149. }
  1150. }
  1151. bool CharSet<char16>::Get_helper(uint k) const
  1152. {
  1153. Assert(!IsCompact());
  1154. CharSetNode* curr = rep.full.root;
  1155. for (int level = CharSetNode::levels - 1; level > 0; level--)
  1156. {
  1157. if (curr == CharSetFull::TheFullNode)
  1158. return true;
  1159. CharSetInner* inner = (CharSetInner*)curr;
  1160. uint i = CharSetNode::innerIdx(level, k);
  1161. if (inner->children[i] == 0)
  1162. return false;
  1163. else
  1164. curr = inner->children[i];
  1165. }
  1166. if (curr == CharSetFull::TheFullNode)
  1167. return true;
  1168. CharSetLeaf* leaf = (CharSetLeaf*)curr;
  1169. return leaf->vec.Get(CharSetNode::leafIdx(k));
  1170. }
  1171. void CharSet<char16>::ToComplement(ArenaAllocator* allocator, CharSet<Char>& result)
  1172. {
  1173. if (IsCompact())
  1174. {
  1175. Sort();
  1176. if (this->GetCompactLength() > 0)
  1177. {
  1178. if (this->GetCompactCharU(0) > 0)
  1179. result.SetRange(allocator, UTC(0), UTC(this->GetCompactCharU(0) - 1));
  1180. for (uint i = 0; i < this->GetCompactLength() - 1; i++)
  1181. {
  1182. result.SetRange(allocator, UTC(this->GetCompactCharU(i) + 1), UTC(this->GetCompactCharU(i + 1) - 1));
  1183. }
  1184. if (this->GetCompactCharU(this->GetCompactLength() - 1) < MaxUChar)
  1185. {
  1186. result.SetRange(allocator, UTC(this->GetCompactCharU(this->GetCompactLength() - 1) + 1), UTC(MaxUChar));
  1187. }
  1188. }
  1189. else if (this->GetCompactLength() == 0)
  1190. {
  1191. result.SetRange(allocator, UTC(0), UTC(MaxUChar));
  1192. }
  1193. }
  1194. else
  1195. {
  1196. rep.full.direct.ToComplement<char16>(allocator, 0, result);
  1197. if (rep.full.root == nullptr)
  1198. result.SetRange(allocator, UTC(CharSetNode::directSize), MaxChar);
  1199. else
  1200. rep.full.root->ToComplement(allocator, CharSetNode::levels - 1, 0, result);
  1201. }
  1202. }
  1203. void CharSet<char16>::ToEquivClass(ArenaAllocator* allocator, CharSet<Char>& result)
  1204. {
  1205. uint tblidx = 0;
  1206. if (IsCompact())
  1207. {
  1208. Sort();
  1209. for (uint i = 0; i < this->GetCompactLength(); i++)
  1210. {
  1211. uint acth;
  1212. Char equivs[CaseInsensitive::EquivClassSize];
  1213. if (CaseInsensitive::RangeToEquivClass(tblidx, this->GetCompactCharU(i), this->GetCompactCharU(i), acth, equivs))
  1214. {
  1215. for (int j = 0; j < CaseInsensitive::EquivClassSize; j++)
  1216. {
  1217. result.Set(allocator, equivs[j]);
  1218. }
  1219. }
  1220. else
  1221. {
  1222. result.Set(allocator, this->GetCompactChar(i));
  1223. }
  1224. }
  1225. }
  1226. else
  1227. {
  1228. rep.full.direct.ToEquivClass<char16>(allocator, 0, tblidx, result);
  1229. if (rep.full.root != nullptr)
  1230. {
  1231. rep.full.root->ToEquivClassW(allocator, CharSetNode::levels - 1, 0, tblidx, result);
  1232. }
  1233. }
  1234. }
  1235. void CharSet<char16>::ToEquivClassCP(ArenaAllocator* allocator, CharSet<codepoint_t>& result, codepoint_t baseOffset)
  1236. {
  1237. uint tblidx = 0;
  1238. if (IsCompact())
  1239. {
  1240. Sort();
  1241. for (uint i = 0; i < this->GetCompactLength(); i++)
  1242. {
  1243. uint acth;
  1244. codepoint_t equivs[CaseInsensitive::EquivClassSize];
  1245. if (CaseInsensitive::RangeToEquivClass(tblidx, this->GetCompactCharU(i) + baseOffset, this->GetCompactCharU(i) + baseOffset, acth, equivs))
  1246. {
  1247. for (int j = 0; j < CaseInsensitive::EquivClassSize; j++)
  1248. {
  1249. result.Set(allocator, equivs[j]);
  1250. }
  1251. }
  1252. else
  1253. {
  1254. result.Set(allocator, this->GetCompactChar(i) + baseOffset);
  1255. }
  1256. }
  1257. }
  1258. else
  1259. {
  1260. rep.full.direct.ToEquivClass<codepoint_t>(allocator, 0, tblidx, result, baseOffset);
  1261. if (rep.full.root != nullptr)
  1262. {
  1263. rep.full.root->ToEquivClassCP(allocator, CharSetNode::levels - 1, 0, tblidx, result, baseOffset);
  1264. }
  1265. }
  1266. }
  1267. int CharSet<char16>::GetCompactEntries(uint max, __out_ecount(max) Char* entries) const
  1268. {
  1269. Assert(max <= MaxCompact);
  1270. if (!IsCompact())
  1271. return -1;
  1272. uint count = min(max, (uint)(this->GetCompactLength()));
  1273. __analysis_assume(count <= max);
  1274. for (uint i = 0; i < count; i++)
  1275. {
  1276. // Bug in oacr. it can't figure out count is less than or equal to max
  1277. #pragma warning(suppress: 22102)
  1278. entries[i] = this->GetCompactChar(i);
  1279. }
  1280. return static_cast<int>(rep.compact.countPlusOne - 1);
  1281. }
  1282. bool CharSet<char16>::IsSubsetOf(const CharSet<Char>& other) const
  1283. {
  1284. if (IsCompact())
  1285. {
  1286. for (uint i = 0; i < this->GetCompactLength(); i++)
  1287. {
  1288. if (!other.Get(this->GetCompactChar(i)))
  1289. return false;
  1290. }
  1291. return true;
  1292. }
  1293. else
  1294. {
  1295. if (other.IsCompact())
  1296. return false;
  1297. if (!rep.full.direct.IsSubsetOf(other.rep.full.direct))
  1298. return false;
  1299. if (rep.full.root == nullptr)
  1300. return true;
  1301. if (other.rep.full.root == nullptr)
  1302. return false;
  1303. return rep.full.root->IsSubsetOf(CharSetNode::levels - 1, other.rep.full.root);
  1304. }
  1305. }
  1306. bool CharSet<char16>::IsEqualTo(const CharSet<Char>& other) const
  1307. {
  1308. if (IsCompact())
  1309. {
  1310. if (!other.IsCompact())
  1311. return false;
  1312. if (rep.compact.countPlusOne != other.rep.compact.countPlusOne)
  1313. return false;
  1314. for (uint i = 0; i < this->GetCompactLength(); i++)
  1315. {
  1316. if (!other.Get(this->GetCompactChar(i)))
  1317. return false;
  1318. }
  1319. return true;
  1320. }
  1321. else
  1322. {
  1323. if (other.IsCompact())
  1324. return false;
  1325. if (!rep.full.direct.IsEqualTo(other.rep.full.direct))
  1326. return false;
  1327. if ((rep.full.root == nullptr) != (other.rep.full.root == nullptr))
  1328. return false;
  1329. if (rep.full.root == nullptr)
  1330. return true;
  1331. return rep.full.root->IsEqualTo(CharSetNode::levels - 1, other.rep.full.root);
  1332. }
  1333. }
  1334. #if ENABLE_REGEX_CONFIG_OPTIONS
  1335. // CAUTION: This method is very slow.
  1336. void CharSet<char16>::Print(DebugWriter* w) const
  1337. {
  1338. w->Print(_u("["));
  1339. int start = -1;
  1340. for (uint i = 0; i < NumChars; i++)
  1341. {
  1342. if (Get(UTC(i)))
  1343. {
  1344. if (start < 0)
  1345. {
  1346. start = i;
  1347. w->PrintEscapedChar(UTC(i));
  1348. }
  1349. }
  1350. else
  1351. {
  1352. if (start >= 0)
  1353. {
  1354. if (i > (uint)(start + 1))
  1355. {
  1356. if (i > (uint)(start + 2))
  1357. w->Print(_u("-"));
  1358. w->PrintEscapedChar(UTC(i - 1));
  1359. }
  1360. start = -1;
  1361. }
  1362. }
  1363. }
  1364. if (start >= 0)
  1365. {
  1366. if ((uint)start < MaxUChar - 1)
  1367. w->Print(_u("-"));
  1368. w->PrintEscapedChar(MaxChar);
  1369. }
  1370. w->Print(_u("]"));
  1371. }
  1372. #endif
  1373. // ----------------------------------------------------------------------
  1374. // CharSet<codepoint_t>
  1375. // ----------------------------------------------------------------------
  1376. CharSet<codepoint_t>::CharSet()
  1377. {
  1378. #if DBG
  1379. for (int i = 0; i < NumberOfPlanes; i++)
  1380. {
  1381. this->characterPlanes[i].IsEmpty();
  1382. }
  1383. #endif
  1384. }
  1385. void CharSet<codepoint_t>::FreeBody(ArenaAllocator* allocator)
  1386. {
  1387. for (int i = 0; i < NumberOfPlanes; i++)
  1388. {
  1389. this->characterPlanes[i].FreeBody(allocator);
  1390. }
  1391. }
  1392. void CharSet<codepoint_t>::Clear(ArenaAllocator* allocator)
  1393. {
  1394. for (int i = 0; i < NumberOfPlanes; i++)
  1395. {
  1396. this->characterPlanes[i].Clear(allocator);
  1397. }
  1398. }
  1399. void CharSet<codepoint_t>::CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other)
  1400. {
  1401. for (int i = 0; i < NumberOfPlanes; i++)
  1402. {
  1403. this->characterPlanes[i].Clear(allocator);
  1404. this->characterPlanes[i].CloneFrom(allocator, other.characterPlanes[i]);
  1405. }
  1406. }
  1407. void CharSet<codepoint_t>::CloneSimpleCharsTo(ArenaAllocator* allocator, CharSet<char16>& other) const
  1408. {
  1409. other.CloneFrom(allocator, this->characterPlanes[0]);
  1410. }
  1411. void CharSet<codepoint_t>::SetRange(ArenaAllocator* allocator, Char lc, Char hc)
  1412. {
  1413. Assert(lc <= hc);
  1414. int lowerIndex = this->CharToIndex(lc);
  1415. int upperIndex = this->CharToIndex(hc);
  1416. if (lowerIndex == upperIndex)
  1417. {
  1418. this->characterPlanes[lowerIndex].SetRange(allocator, this->RemoveOffset(lc), this->RemoveOffset(hc));
  1419. }
  1420. else
  1421. {
  1422. // Do the partial ranges
  1423. char16 partialLower = this->RemoveOffset(lc);
  1424. char16 partialHigher = this->RemoveOffset(hc);
  1425. if (partialLower != 0)
  1426. {
  1427. this->characterPlanes[lowerIndex].SetRange(allocator, partialLower, Chars<char16>::MaxUChar);
  1428. lowerIndex++;
  1429. }
  1430. for(; lowerIndex < upperIndex; lowerIndex++)
  1431. {
  1432. this->characterPlanes[lowerIndex].SetRange(allocator, 0, Chars<char16>::MaxUChar);
  1433. }
  1434. this->characterPlanes[upperIndex].SetRange(allocator, 0, partialHigher);
  1435. }
  1436. }
  1437. void CharSet<codepoint_t>::SetRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs)
  1438. {
  1439. for (int i = 0; i < numSortedPairs * 2; i += 2)
  1440. {
  1441. Assert(i == 0 || sortedPairs[i-1] < sortedPairs[i]);
  1442. Assert(sortedPairs[i] <= sortedPairs[i+1]);
  1443. SetRange(allocator, sortedPairs[i], sortedPairs[i+1]);
  1444. }
  1445. }
  1446. void CharSet<codepoint_t>::SetNotRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs)
  1447. {
  1448. if (numSortedPairs == 0)
  1449. {
  1450. for (int i = 0; i < NumberOfPlanes; i++)
  1451. {
  1452. this->characterPlanes[i].SetRange(allocator, 0, Chars<char16>::MaxUChar);
  1453. }
  1454. }
  1455. else
  1456. {
  1457. if (sortedPairs[0] != MinChar)
  1458. {
  1459. SetRange(allocator, MinChar, sortedPairs[0] - 1);
  1460. }
  1461. for (int i = 1; i < numSortedPairs * 2 - 1; i += 2)
  1462. {
  1463. SetRange(allocator, sortedPairs[i] + 1, sortedPairs[i+1] - 1);
  1464. }
  1465. if (sortedPairs[numSortedPairs * 2 - 1] != MaxChar)
  1466. {
  1467. SetRange(allocator, sortedPairs[numSortedPairs * 2 - 1] + 1, MaxChar);
  1468. }
  1469. }
  1470. }
  1471. void CharSet<codepoint_t>::UnionInPlace(ArenaAllocator* allocator, const CharSet<Char>& other)
  1472. {
  1473. for (int i = 0; i < NumberOfPlanes; i++)
  1474. {
  1475. this->characterPlanes[i].UnionInPlace(allocator, other.characterPlanes[i]);
  1476. }
  1477. }
  1478. void CharSet<codepoint_t>::UnionInPlace(ArenaAllocator* allocator, const CharSet<char16>& other)
  1479. {
  1480. this->characterPlanes[0].UnionInPlace(allocator, other);
  1481. }
  1482. _Success_(return)
  1483. bool CharSet<codepoint_t>::GetNextRange(Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar)
  1484. {
  1485. Assert(outLowerChar != nullptr);
  1486. Assert(outHigherChar != nullptr);
  1487. if (searchCharStart >= 0x110000)
  1488. {
  1489. return false;
  1490. }
  1491. char16 currentLowChar = 1, currentHighChar = 0;
  1492. int index = this->CharToIndex(searchCharStart);
  1493. char16 offsetLessSearchCharStart = this->RemoveOffset(searchCharStart);
  1494. for (; index < NumberOfPlanes; index++)
  1495. {
  1496. if (this->characterPlanes[index].GetNextRange(offsetLessSearchCharStart, &currentLowChar, &currentHighChar))
  1497. {
  1498. break;
  1499. }
  1500. offsetLessSearchCharStart = 0x0;
  1501. }
  1502. if (index == NumberOfPlanes)
  1503. {
  1504. return false;
  1505. }
  1506. Assert(currentHighChar >= currentLowChar);
  1507. // else found range
  1508. *outLowerChar = this->AddOffset(currentLowChar, index);
  1509. *outHigherChar = this->AddOffset(currentHighChar, index);
  1510. // Check if range crosses plane boundaries
  1511. index ++;
  1512. for (; index < NumberOfPlanes; index++)
  1513. {
  1514. if (!this->characterPlanes[index].GetNextRange(0x0, &currentLowChar, &currentHighChar) || *outHigherChar + 1 != this->AddOffset(currentLowChar, index))
  1515. {
  1516. break;
  1517. }
  1518. Assert(this->AddOffset(currentHighChar, index) > *outHigherChar);
  1519. *outHigherChar = this->AddOffset(currentHighChar, index);
  1520. }
  1521. return true;
  1522. }
  1523. void CharSet<codepoint_t>::ToComplement(ArenaAllocator* allocator, CharSet<Char>& result)
  1524. {
  1525. for (int i = 0; i < NumberOfPlanes; i++)
  1526. {
  1527. this->characterPlanes[i].ToComplement(allocator, result.characterPlanes[i]);
  1528. }
  1529. }
  1530. void CharSet<codepoint_t>::ToSimpleComplement(ArenaAllocator* allocator, CharSet<Char>& result)
  1531. {
  1532. this->characterPlanes[0].ToComplement(allocator, result.characterPlanes[0]);
  1533. }
  1534. void CharSet<codepoint_t>::ToSimpleComplement(ArenaAllocator* allocator, CharSet<char16>& result)
  1535. {
  1536. this->characterPlanes[0].ToComplement(allocator, result);
  1537. }
  1538. void CharSet<codepoint_t>::ToEquivClass(ArenaAllocator* allocator, CharSet<Char>& result)
  1539. {
  1540. for (int i = 0; i < NumberOfPlanes; i++)
  1541. {
  1542. this->characterPlanes[i].ToEquivClassCP(allocator, result, AddOffset(0, i));
  1543. }
  1544. }
  1545. void CharSet<codepoint_t>::ToSurrogateEquivClass(ArenaAllocator* allocator, CharSet<Char>& result)
  1546. {
  1547. this->CloneSimpleCharsTo(allocator, result.characterPlanes[0]);
  1548. for (int i = 1; i < NumberOfPlanes; i++)
  1549. {
  1550. this->characterPlanes[i].ToEquivClassCP(allocator, result, AddOffset(0, i));
  1551. }
  1552. }
  1553. #if ENABLE_REGEX_CONFIG_OPTIONS
  1554. void CharSet<codepoint_t>::Print(DebugWriter* w) const
  1555. {
  1556. w->Print(_u("Characters 0 - 65535"));
  1557. for (int i = 0; i < NumberOfPlanes; i++)
  1558. {
  1559. int base = (i + 1) * 0x10000;
  1560. w->Print(_u("Characters %d - %d"), base, base + 0xFFFF);
  1561. this->characterPlanes[i].Print(w);
  1562. }
  1563. }
  1564. #endif
  1565. // ----------------------------------------------------------------------
  1566. // RuntimeCharSet<char16>
  1567. // ----------------------------------------------------------------------
  1568. RuntimeCharSet<char16>::RuntimeCharSet()
  1569. {
  1570. root = nullptr;
  1571. direct.Clear();
  1572. }
  1573. void RuntimeCharSet<char16>::FreeBody(ArenaAllocator* allocator)
  1574. {
  1575. if (root != nullptr)
  1576. {
  1577. root->FreeSelf(allocator);
  1578. #if DBG
  1579. root = nullptr;
  1580. #endif
  1581. }
  1582. }
  1583. void RuntimeCharSet<char16>::CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other)
  1584. {
  1585. Assert(root == nullptr);
  1586. Assert(direct.Count() == 0);
  1587. if (other.IsCompact())
  1588. {
  1589. for (uint i = 0; i < other.GetCompactLength(); i++)
  1590. {
  1591. uint k = other.GetCompactCharU(i);
  1592. if (k < CharSetNode::directSize)
  1593. direct.Set(k);
  1594. else
  1595. {
  1596. if (root == nullptr)
  1597. root = Anew(allocator, CharSetInner);
  1598. #if DBG
  1599. CharSetNode* newRoot =
  1600. #endif
  1601. root->Set(allocator, CharSetNode::levels - 1, k, k);
  1602. #if DBG
  1603. // NOTE: Since we can add at most MaxCompact characters, we can never fill a leaf or inner node,
  1604. // thus we will never need to reallocated nodes
  1605. Assert(newRoot == root);
  1606. #endif
  1607. }
  1608. }
  1609. }
  1610. else
  1611. {
  1612. root = other.rep.full.root == nullptr ? nullptr : other.rep.full.root->Clone(allocator);
  1613. direct.CloneFrom(other.rep.full.direct);
  1614. }
  1615. }
  1616. bool RuntimeCharSet<char16>::Get_helper(uint k) const
  1617. {
  1618. CharSetNode* curr = root;
  1619. for (int level = CharSetNode::levels - 1; level > 0; level--)
  1620. {
  1621. if (curr == CharSetFull::TheFullNode)
  1622. return true;
  1623. CharSetInner* inner = (CharSetInner*)curr;
  1624. uint i = CharSetNode::innerIdx(level, k);
  1625. if (inner->children[i] == 0)
  1626. return false;
  1627. else
  1628. curr = inner->children[i];
  1629. }
  1630. if (curr == CharSetFull::TheFullNode)
  1631. return true;
  1632. CharSetLeaf* leaf = (CharSetLeaf*)curr;
  1633. return leaf->vec.Get(CharSetNode::leafIdx(k));
  1634. }
  1635. #if ENABLE_REGEX_CONFIG_OPTIONS
  1636. // CAUTION: This method is very slow.
  1637. void RuntimeCharSet<char16>::Print(DebugWriter* w) const
  1638. {
  1639. w->Print(_u("["));
  1640. int start = -1;
  1641. for (uint i = 0; i < NumChars; i++)
  1642. {
  1643. if (Get(UTC(i)))
  1644. {
  1645. if (start < 0)
  1646. {
  1647. start = i;
  1648. w->PrintEscapedChar(UTC(i));
  1649. }
  1650. }
  1651. else
  1652. {
  1653. if (start >= 0)
  1654. {
  1655. if (i > (uint)(start + 1))
  1656. {
  1657. if (i > (uint)(start + 2))
  1658. w->Print(_u("-"));
  1659. w->PrintEscapedChar(UTC(i - 1));
  1660. }
  1661. start = -1;
  1662. }
  1663. }
  1664. }
  1665. if (start >= 0)
  1666. {
  1667. if ((uint)start < MaxUChar - 1)
  1668. w->Print(_u("-"));
  1669. w->PrintEscapedChar(MaxChar);
  1670. }
  1671. w->Print(_u("]"));
  1672. }
  1673. #endif
  1674. // The VS2013 linker treats this as a redefinition of an already
  1675. // defined constant and complains. So skip the declaration if we're compiling
  1676. // with VS2013 or below.
  1677. #if !defined(_MSC_VER) || _MSC_VER >= 1900
  1678. const int CharSetNode::directBits;
  1679. const uint CharSetNode::directSize;
  1680. const uint CharSetNode::innerMask;
  1681. const int CharSetNode::bitsPerLeafLevel;
  1682. const int CharSetNode::branchingPerLeafLevel;
  1683. const uint CharSetNode::leafMask;
  1684. const uint CharSetNode::levels;
  1685. #endif
  1686. }