CharTrie.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. namespace UnifiedRegex
  7. {
  8. // The VS2013 linker treats this as a redefinition of an already
  9. // defined constant and complains. So skip the declaration if we're compiling
  10. // with VS2013 or below.
  11. #if !defined(_MSC_VER) || _MSC_VER >= 1900
  12. const int CharTrie::initCapacity;
  13. #endif
  14. // ----------------------------------------------------------------------
  15. // CharTrie
  16. // ----------------------------------------------------------------------
  17. inline bool CharTrie::Find(Char c, int& outi)
  18. {
  19. if (count == 0)
  20. {
  21. outi = 0;
  22. return false;
  23. }
  24. int l = 0;
  25. int h = count - 1;
  26. while (true)
  27. {
  28. int m = (l + h) / 2;
  29. if (children[m].c == c)
  30. {
  31. outi = m;
  32. return true;
  33. }
  34. else if (CTU(children[m].c) < CTU(c))
  35. {
  36. l = m + 1;
  37. if (l > h)
  38. {
  39. outi = l;
  40. return false;
  41. }
  42. }
  43. else
  44. {
  45. h = m - 1;
  46. if (h < l)
  47. {
  48. outi = m;
  49. return false;
  50. }
  51. }
  52. }
  53. return false;
  54. }
  55. void CharTrie::FreeBody(ArenaAllocator* allocator)
  56. {
  57. for (int i = 0; i < count; i++)
  58. children[i].node.FreeBody(allocator);
  59. if (capacity > 0)
  60. AdeleteArray(allocator, capacity, children);
  61. #if DBG
  62. count = 0;
  63. capacity = 0;
  64. children = 0;
  65. #endif
  66. }
  67. CharTrie* CharTrie::Add(ArenaAllocator* allocator, Char c)
  68. {
  69. int i;
  70. if (!Find(c, i))
  71. {
  72. if (capacity <= count)
  73. {
  74. int newCapacity = max(capacity * 2, initCapacity);
  75. children = (CharTrieEntry*)allocator->Realloc(children, capacity * sizeof(CharTrieEntry), newCapacity * sizeof(CharTrieEntry));
  76. capacity = newCapacity;
  77. }
  78. for (int j = count; j > i; j--)
  79. {
  80. children[j].c = children[j - 1].c;
  81. children[j].node = children[j - 1].node;
  82. }
  83. children[i].c = c;
  84. children[i].node.Reset();
  85. count++;
  86. }
  87. return &children[i].node;
  88. }
  89. bool CharTrie::IsDepthZero() const
  90. {
  91. return isAccepting && count == 0;
  92. }
  93. bool CharTrie::IsDepthOne() const
  94. {
  95. if (isAccepting)
  96. return 0;
  97. for (int i = 0; i < count; i++)
  98. {
  99. if (!children[i].node.IsDepthZero())
  100. return false;
  101. }
  102. return true;
  103. }
  104. #if ENABLE_REGEX_CONFIG_OPTIONS
  105. void CharTrie::Print(DebugWriter* w) const
  106. {
  107. w->Indent();
  108. if (isAccepting)
  109. w->PrintEOL(_u("<accept>"));
  110. for (int i = 0; i < count; i++)
  111. {
  112. w->PrintQuotedChar(children[i].c);
  113. w->EOL();
  114. children[i].node.Print(w);
  115. }
  116. w->Unindent();
  117. }
  118. #endif
  119. // ----------------------------------------------------------------------
  120. // RuntimeCharTrie
  121. // ----------------------------------------------------------------------
  122. bool RuntimeCharTrie::Match
  123. (const Char* const input
  124. , const CharCount inputLength
  125. , CharCount& inputOffset
  126. #if ENABLE_REGEX_CONFIG_OPTIONS
  127. , RegexStats* stats
  128. #endif
  129. ) const
  130. {
  131. const RuntimeCharTrie* curr = this;
  132. while (true)
  133. {
  134. if (curr->count == 0)
  135. return true;
  136. if (inputOffset >= inputLength)
  137. return false;
  138. #if ENABLE_REGEX_CONFIG_OPTIONS
  139. if (stats != 0)
  140. stats->numCompares++;
  141. #endif
  142. #if 0
  143. int l = 0;
  144. int h = curr->count - 1;
  145. while (true)
  146. {
  147. if (l > h)
  148. return false;
  149. int m = (l + h) / 2;
  150. if (curr->children[m].c == input[inputOffset])
  151. {
  152. inputOffset++;
  153. curr = &curr->children[m].node;
  154. break;
  155. }
  156. else if (CTU(curr->children[m].c) < CTU(input[inputOffset]))
  157. l = m + 1;
  158. else
  159. h = m - 1;
  160. }
  161. #else
  162. int i = 0;
  163. while (true)
  164. {
  165. if (curr->children[i].c == input[inputOffset])
  166. {
  167. inputOffset++;
  168. curr = &curr->children[i].node;
  169. break;
  170. }
  171. else if (curr->children[i].c > input[inputOffset])
  172. return false;
  173. else if (++i >= curr->count)
  174. return false;
  175. }
  176. #endif
  177. }
  178. }
  179. void RuntimeCharTrie::FreeBody(ArenaAllocator* allocator)
  180. {
  181. for (int i = 0; i < count; i++)
  182. children[i].node.FreeBody(allocator);
  183. if (count > 0)
  184. AdeleteArray(allocator, count, children);
  185. #if DBG
  186. count = 0;
  187. children = 0;
  188. #endif
  189. }
  190. void RuntimeCharTrie::CloneFrom(ArenaAllocator* allocator, const CharTrie& other)
  191. {
  192. count = other.count;
  193. if (count > 0)
  194. {
  195. children = AnewArray(allocator, RuntimeCharTrieEntry, count);
  196. for (int i = 0; i < count; i++)
  197. {
  198. children[i].c = other.children[i].c;
  199. children[i].node.CloneFrom(allocator, other.children[i].node);
  200. }
  201. }
  202. else
  203. children = 0;
  204. }
  205. #if ENABLE_REGEX_CONFIG_OPTIONS
  206. void RuntimeCharTrie::Print(DebugWriter* w) const
  207. {
  208. w->Indent();
  209. for (int i = 0; i < count; i++)
  210. {
  211. w->PrintQuotedChar(children[i].c);
  212. w->EOL();
  213. children[i].node.Print(w);
  214. }
  215. w->Unindent();
  216. }
  217. #endif
  218. }