CharMap.h 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. namespace UnifiedRegex
  7. {
  8. enum CharMapScheme
  9. {
  10. CharMapScheme_Linear,
  11. CharMapScheme_Full
  12. };
  13. template <typename C, typename V, CharMapScheme scheme = CharMapScheme_Full>
  14. class CharMap {};
  15. template <typename V, CharMapScheme scheme>
  16. class CharMap<char, V, scheme> : private Chars<char>
  17. {
  18. private:
  19. V map[NumChars];
  20. public:
  21. CharMap(V defv)
  22. {
  23. for (int i = 0; i < NumChars; i++)
  24. map[i] = defv;
  25. }
  26. void FreeBody(ArenaAllocator* allocator)
  27. {
  28. }
  29. inline void Set(ArenaAllocator* allocator, Char k, V v)
  30. {
  31. map[CTU(k)] = v;
  32. }
  33. inline V Get(Char k) const
  34. {
  35. return map[CTU(k)];
  36. }
  37. };
  38. static const uint MaxCharMapLinearChars = 4;
  39. template <typename V>
  40. class CharMap<char16, V, CharMapScheme_Linear> : private Chars<char16>
  41. {
  42. template <typename C>
  43. friend class TextbookBoyerMooreWithLinearMap;
  44. using typename Chars<char16>::Char;
  45. using Chars<char16>::CTU;
  46. private:
  47. V defv;
  48. uint map[MaxCharMapLinearChars];
  49. V lastOcc[MaxCharMapLinearChars];
  50. public:
  51. CharMap(V defv) : defv(defv)
  52. {
  53. for (uint i = 0; i < MaxCharMapLinearChars; i++)
  54. {
  55. map[i] = 0;
  56. lastOcc[i] = defv;
  57. }
  58. }
  59. inline void Set(uint numLinearChars, Char const * map, V const * lastOcc)
  60. {
  61. Assert(numLinearChars <= MaxCharMapLinearChars);
  62. for (uint i = 0; i < numLinearChars; i++)
  63. {
  64. this->map[i] = CTU(map[i]);
  65. this->lastOcc[i] = lastOcc[i];
  66. }
  67. }
  68. uint GetChar(uint index) const
  69. {
  70. Assert(index < MaxCharMapLinearChars);
  71. __analysis_assume(index < MaxCharMapLinearChars);
  72. return map[index];
  73. }
  74. V GetLastOcc(uint index) const
  75. {
  76. Assert(index < MaxCharMapLinearChars);
  77. __analysis_assume(index < MaxCharMapLinearChars);
  78. return lastOcc[index];
  79. }
  80. inline V Get(uint inputChar) const
  81. {
  82. if (map[0] == inputChar)
  83. return lastOcc[0];
  84. if (map[1] == inputChar)
  85. return lastOcc[1];
  86. if (map[2] == inputChar)
  87. return lastOcc[2];
  88. if (map[3] == inputChar)
  89. return lastOcc[3];
  90. return defv;
  91. }
  92. inline V Get(Char k) const
  93. {
  94. return Get(CTU(k));
  95. }
  96. };
  97. template <typename V, CharMapScheme scheme>
  98. class CharMap<char16, V, scheme> : private Chars<char16>
  99. {
  100. // public:
  101. using Chars<char16>::Char;
  102. using Chars<char16>::CharWidth;
  103. using Chars<char16>::CTU;
  104. private:
  105. static const int directBits = Chars<char>::CharWidth;
  106. static const int directSize = Chars<char>::NumChars;
  107. static const int bitsPerLevel = 4;
  108. static const int branchingPerLevel = 1 << bitsPerLevel;
  109. static const uint mask = branchingPerLevel - 1;
  110. static const int levels = CharWidth / bitsPerLevel;
  111. inline static uint innerIdx(int level, uint v)
  112. {
  113. return (v >> (level * bitsPerLevel)) & mask;
  114. }
  115. inline static uint leafIdx(uint v)
  116. {
  117. return v & mask;
  118. }
  119. struct Node
  120. {
  121. virtual void FreeSelf(ArenaAllocator* allocator) = 0;
  122. virtual void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) = 0;
  123. virtual V Get(V defv, int level, uint k) const = 0;
  124. static inline Node* For(ArenaAllocator* allocator, int level, V defv)
  125. {
  126. if (level == 0)
  127. return Anew(allocator, Leaf, defv);
  128. else
  129. return Anew(allocator, Inner);
  130. }
  131. };
  132. struct Inner : Node
  133. {
  134. Node* children[branchingPerLevel];
  135. Inner()
  136. {
  137. for (int i = 0; i < branchingPerLevel; i++)
  138. children[i] = 0;
  139. }
  140. void FreeSelf(ArenaAllocator* allocator) override
  141. {
  142. for (int i = 0; i < branchingPerLevel; i++)
  143. {
  144. if (children[i] != 0)
  145. {
  146. children[i]->FreeSelf(allocator);
  147. #if DBG
  148. children[i] = 0;
  149. #endif
  150. }
  151. }
  152. Adelete(allocator, this);
  153. }
  154. void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) override
  155. {
  156. Assert(level > 0);
  157. uint i = innerIdx(level--, k);
  158. if (children[i] == 0)
  159. {
  160. if (v == defv)
  161. return;
  162. children[i] = Node::For(allocator, level, defv);
  163. }
  164. children[i]->Set(allocator, defv, level, k, v);
  165. }
  166. V Get(V defv, int level, uint k) const override
  167. {
  168. Assert(level > 0);
  169. uint i = innerIdx(level--, k);
  170. if (children[i] == 0)
  171. return defv;
  172. else
  173. return children[i]->Get(defv, level, k);
  174. }
  175. };
  176. struct Leaf : Node
  177. {
  178. V values[branchingPerLevel];
  179. Leaf(V defv)
  180. {
  181. for (int i = 0; i < branchingPerLevel; i++)
  182. values[i] = defv;
  183. }
  184. void FreeSelf(ArenaAllocator* allocator) override
  185. {
  186. Adelete(allocator, this);
  187. }
  188. void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) override
  189. {
  190. Assert(level == 0);
  191. values[leafIdx(k)] = v;
  192. }
  193. V Get(V defv, int level, uint k) const override
  194. {
  195. Assert(level == 0);
  196. return values[leafIdx(k)];
  197. }
  198. };
  199. Field(BVStatic<directSize>) isInMap;
  200. Field(V) defv;
  201. Field(V) directMap[directSize];
  202. FieldNoBarrier(Node*) root;
  203. public:
  204. CharMap(V defv)
  205. : defv(defv)
  206. , root(0)
  207. {
  208. for (int i = 0; i < directSize; i++)
  209. directMap[i] = defv;
  210. }
  211. void FreeBody(ArenaAllocator* allocator)
  212. {
  213. if (root != 0)
  214. {
  215. root->FreeSelf(allocator);
  216. #if DBG
  217. root = 0;
  218. #endif
  219. }
  220. }
  221. void Set(ArenaAllocator* allocator, Char kc, V v)
  222. {
  223. uint k = CTU(kc);
  224. if (k < directSize)
  225. {
  226. isInMap.Set(k);
  227. directMap[k] = v;
  228. }
  229. else
  230. {
  231. if (root == 0)
  232. {
  233. if (v == defv)
  234. return;
  235. root = Anew(allocator, Inner);
  236. }
  237. root->Set(allocator, defv, levels - 1, k, v);
  238. }
  239. }
  240. bool GetNonDirect(uint k, V& lastOcc) const
  241. {
  242. Assert(k >= directSize);
  243. if (root == nullptr)
  244. {
  245. return false;
  246. }
  247. Node* curr = root;
  248. for (int level = levels - 1; level > 0; level--)
  249. {
  250. Inner* inner = (Inner*)curr;
  251. uint i = innerIdx(level, k);
  252. if (inner->children[i] == 0)
  253. return false;
  254. else
  255. curr = inner->children[i];
  256. }
  257. Leaf* leaf = (Leaf*)curr;
  258. lastOcc = leaf->values[leafIdx(k)];
  259. return true;
  260. }
  261. uint GetDirectMapSize() const { return directSize; }
  262. BOOL IsInDirectMap(uint c) const { Assert(c < directSize); return isInMap.Test(c); }
  263. V GetDirectMap(uint c) const
  264. {
  265. Assert(c < directSize);
  266. __analysis_assume(c < directSize);
  267. return directMap[c];
  268. }
  269. inline V Get(Char kc) const
  270. {
  271. if (CTU(kc) < GetDirectMapSize())
  272. {
  273. if (!IsInDirectMap(CTU(kc)))
  274. {
  275. return defv;
  276. }
  277. return GetDirectMap(CTU(kc));
  278. }
  279. else
  280. {
  281. V lastOcc;
  282. if (!GetNonDirect(CTU(kc), lastOcc))
  283. {
  284. return defv;
  285. }
  286. return lastOcc;
  287. }
  288. }
  289. };
  290. }