CharMap.h 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. namespace UnifiedRegex
  7. {
  8. enum CharMapScheme
  9. {
  10. CharMapScheme_Linear,
  11. CharMapScheme_Full
  12. };
  13. template <typename C, typename V, CharMapScheme scheme = CharMapScheme_Full>
  14. class CharMap {};
  15. template <typename V, CharMapScheme scheme>
  16. class CharMap<char, V, scheme> : private Chars<char>
  17. {
  18. private:
  19. V map[NumChars];
  20. public:
  21. CharMap(V defv)
  22. {
  23. for (int i = 0; i < NumChars; i++)
  24. map[i] = defv;
  25. }
  26. void FreeBody(ArenaAllocator* allocator)
  27. {
  28. }
  29. inline void Set(ArenaAllocator* allocator, Char k, V v)
  30. {
  31. map[CTU(k)] = v;
  32. }
  33. inline V Get(Char k) const
  34. {
  35. return map[CTU(k)];
  36. }
  37. };
  38. static const uint MaxCharMapLinearChars = 4;
  39. template <typename V>
  40. class CharMap<char16, V, CharMapScheme_Linear> : private Chars<char16>
  41. {
  42. template <typename C>
  43. friend class TextbookBoyerMooreWithLinearMap;
  44. private:
  45. V defv;
  46. uint map[MaxCharMapLinearChars];
  47. V lastOcc[MaxCharMapLinearChars];
  48. public:
  49. CharMap(V defv) : defv(defv)
  50. {
  51. for (uint i = 0; i < MaxCharMapLinearChars; i++)
  52. {
  53. map[i] = 0;
  54. lastOcc[i] = defv;
  55. }
  56. }
  57. inline void Set(uint numLinearChars, Char const * map, V const * lastOcc)
  58. {
  59. Assert(numLinearChars <= MaxCharMapLinearChars);
  60. for (uint i = 0; i < numLinearChars; i++)
  61. {
  62. this->map[i] = CTU(map[i]);
  63. this->lastOcc[i] = lastOcc[i];
  64. }
  65. }
  66. uint GetChar(uint index) const
  67. {
  68. Assert(index < MaxCharMapLinearChars);
  69. __analysis_assume(index < MaxCharMapLinearChars);
  70. return map[index];
  71. }
  72. V GetLastOcc(uint index) const
  73. {
  74. Assert(index < MaxCharMapLinearChars);
  75. __analysis_assume(index < MaxCharMapLinearChars);
  76. return lastOcc[index];
  77. }
  78. inline V Get(uint inputChar) const
  79. {
  80. if (map[0] == inputChar)
  81. return lastOcc[0];
  82. if (map[1] == inputChar)
  83. return lastOcc[1];
  84. if (map[2] == inputChar)
  85. return lastOcc[2];
  86. if (map[3] == inputChar)
  87. return lastOcc[3];
  88. return defv;
  89. }
  90. inline V Get(Char k) const
  91. {
  92. return Get(CTU(k));
  93. }
  94. };
  95. template <typename V, CharMapScheme scheme>
  96. class CharMap<char16, V, scheme> : private Chars<char16>
  97. {
  98. private:
  99. static const int directBits = Chars<char>::CharWidth;
  100. static const int directSize = Chars<char>::NumChars;
  101. static const int bitsPerLevel = 4;
  102. static const int branchingPerLevel = 1 << bitsPerLevel;
  103. static const uint mask = branchingPerLevel - 1;
  104. static const int levels = CharWidth / bitsPerLevel;
  105. inline static uint innerIdx(int level, uint v)
  106. {
  107. return (v >> (level * bitsPerLevel)) & mask;
  108. }
  109. inline static uint leafIdx(uint v)
  110. {
  111. return v & mask;
  112. }
  113. struct Node
  114. {
  115. virtual void FreeSelf(ArenaAllocator* allocator) = 0;
  116. virtual void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) = 0;
  117. virtual V Get(V defv, int level, uint k) const = 0;
  118. static inline Node* For(ArenaAllocator* allocator, int level, V defv)
  119. {
  120. if (level == 0)
  121. return Anew(allocator, Leaf, defv);
  122. else
  123. return Anew(allocator, Inner);
  124. }
  125. };
  126. struct Inner : Node
  127. {
  128. Node* children[branchingPerLevel];
  129. Inner()
  130. {
  131. for (int i = 0; i < branchingPerLevel; i++)
  132. children[i] = 0;
  133. }
  134. void FreeSelf(ArenaAllocator* allocator) override
  135. {
  136. for (int i = 0; i < branchingPerLevel; i++)
  137. {
  138. if (children[i] != 0)
  139. {
  140. children[i]->FreeSelf(allocator);
  141. #if DBG
  142. children[i] = 0;
  143. #endif
  144. }
  145. }
  146. Adelete(allocator, this);
  147. }
  148. void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) override
  149. {
  150. Assert(level > 0);
  151. uint i = innerIdx(level--, k);
  152. if (children[i] == 0)
  153. {
  154. if (v == defv)
  155. return;
  156. children[i] = For(allocator, level, defv);
  157. }
  158. children[i]->Set(allocator, defv, level, k, v);
  159. }
  160. V Get(V defv, int level, uint k) const override
  161. {
  162. Assert(level > 0);
  163. uint i = innerIdx(level--, k);
  164. if (children[i] == 0)
  165. return defv;
  166. else
  167. return children[i]->Get(defv, level, k);
  168. }
  169. };
  170. struct Leaf : Node
  171. {
  172. V values[branchingPerLevel];
  173. Leaf(V defv)
  174. {
  175. for (int i = 0; i < branchingPerLevel; i++)
  176. values[i] = defv;
  177. }
  178. void FreeSelf(ArenaAllocator* allocator) override
  179. {
  180. Adelete(allocator, this);
  181. }
  182. void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) override
  183. {
  184. Assert(level == 0);
  185. values[leafIdx(k)] = v;
  186. }
  187. V Get(V defv, int level, uint k) const override
  188. {
  189. Assert(level == 0);
  190. return values[leafIdx(k)];
  191. }
  192. };
  193. BVStatic<directSize> isInMap;
  194. V defv;
  195. V directMap[directSize];
  196. Node* root;
  197. public:
  198. CharMap(V defv)
  199. : defv(defv)
  200. , root(0)
  201. {
  202. for (int i = 0; i < directSize; i++)
  203. directMap[i] = defv;
  204. }
  205. void FreeBody(ArenaAllocator* allocator)
  206. {
  207. if (root != 0)
  208. {
  209. root->FreeSelf(allocator);
  210. #if DBG
  211. root = 0;
  212. #endif
  213. }
  214. }
  215. void Set(ArenaAllocator* allocator, Char kc, V v)
  216. {
  217. uint k = CTU(kc);
  218. if (k < directSize)
  219. {
  220. isInMap.Set(k);
  221. directMap[k] = v;
  222. }
  223. else
  224. {
  225. if (root == 0)
  226. {
  227. if (v == defv)
  228. return;
  229. root = Anew(allocator, Inner);
  230. }
  231. root->Set(allocator, defv, levels - 1, k, v);
  232. }
  233. }
  234. bool GetNonDirect(uint k, V& lastOcc) const
  235. {
  236. Assert(k >= directSize);
  237. if (root == nullptr)
  238. {
  239. return false;
  240. }
  241. Node* curr = root;
  242. for (int level = levels - 1; level > 0; level--)
  243. {
  244. Inner* inner = (Inner*)curr;
  245. uint i = innerIdx(level, k);
  246. if (inner->children[i] == 0)
  247. return false;
  248. else
  249. curr = inner->children[i];
  250. }
  251. Leaf* leaf = (Leaf*)curr;
  252. lastOcc = leaf->values[leafIdx(k)];
  253. return true;
  254. }
  255. uint GetDirectMapSize() const { return directSize; }
  256. BOOL IsInDirectMap(uint c) const { Assert(c < directSize); return isInMap.Test(c); }
  257. V GetDirectMap(uint c) const
  258. {
  259. Assert(c < directSize);
  260. __analysis_assume(c < directSize);
  261. return directMap[c];
  262. }
  263. __inline V Get(Char kc) const
  264. {
  265. if (CTU(kc) < GetDirectMapSize())
  266. {
  267. if (!IsInDirectMap(CTU(kc)))
  268. {
  269. return defv;
  270. }
  271. return GetDirectMap(CTU(kc));
  272. }
  273. else
  274. {
  275. V lastOcc;
  276. if (!GetNonDirect(CTU(kc), lastOcc))
  277. {
  278. return defv;
  279. }
  280. return lastOcc;
  281. }
  282. }
  283. };
  284. }