CharTrie.cpp 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. namespace UnifiedRegex
  7. {
  8. // ----------------------------------------------------------------------
  9. // CharTrie
  10. // ----------------------------------------------------------------------
  11. __inline bool CharTrie::Find(Char c, int& outi)
  12. {
  13. if (count == 0)
  14. {
  15. outi = 0;
  16. return false;
  17. }
  18. int l = 0;
  19. int h = count - 1;
  20. while (true)
  21. {
  22. int m = (l + h) / 2;
  23. if (children[m].c == c)
  24. {
  25. outi = m;
  26. return true;
  27. }
  28. else if (CTU(children[m].c) < CTU(c))
  29. {
  30. l = m + 1;
  31. if (l > h)
  32. {
  33. outi = l;
  34. return false;
  35. }
  36. }
  37. else
  38. {
  39. h = m - 1;
  40. if (h < l)
  41. {
  42. outi = m;
  43. return false;
  44. }
  45. }
  46. }
  47. return false;
  48. }
  49. void CharTrie::FreeBody(ArenaAllocator* allocator)
  50. {
  51. for (int i = 0; i < count; i++)
  52. children[i].node.FreeBody(allocator);
  53. if (capacity > 0)
  54. AdeleteArray(allocator, capacity, children);
  55. #if DBG
  56. count = 0;
  57. capacity = 0;
  58. children = 0;
  59. #endif
  60. }
  61. CharTrie* CharTrie::Add(ArenaAllocator* allocator, Char c)
  62. {
  63. int i;
  64. if (!Find(c, i))
  65. {
  66. if (capacity <= count)
  67. {
  68. int newCapacity = max(capacity * 2, initCapacity);
  69. children = (CharTrieEntry*)allocator->Realloc(children, capacity * sizeof(CharTrieEntry), newCapacity * sizeof(CharTrieEntry));
  70. capacity = newCapacity;
  71. }
  72. for (int j = count; j > i; j--)
  73. {
  74. children[j].c = children[j - 1].c;
  75. children[j].node = children[j - 1].node;
  76. }
  77. children[i].c = c;
  78. children[i].node.Reset();
  79. count++;
  80. }
  81. return &children[i].node;
  82. }
  83. bool CharTrie::IsDepthZero() const
  84. {
  85. return isAccepting && count == 0;
  86. }
  87. bool CharTrie::IsDepthOne() const
  88. {
  89. if (isAccepting)
  90. return 0;
  91. for (int i = 0; i < count; i++)
  92. {
  93. if (!children[i].node.IsDepthZero())
  94. return false;
  95. }
  96. return true;
  97. }
  98. #if ENABLE_REGEX_CONFIG_OPTIONS
  99. void CharTrie::Print(DebugWriter* w) const
  100. {
  101. w->Indent();
  102. if (isAccepting)
  103. w->PrintEOL(L"<accept>");
  104. for (int i = 0; i < count; i++)
  105. {
  106. w->PrintQuotedChar(children[i].c);
  107. w->EOL();
  108. children[i].node.Print(w);
  109. }
  110. w->Unindent();
  111. }
  112. #endif
  113. // ----------------------------------------------------------------------
  114. // RuntimeCharTrie
  115. // ----------------------------------------------------------------------
  116. bool RuntimeCharTrie::Match
  117. (const Char* const input
  118. , const CharCount inputLength
  119. , CharCount& inputOffset
  120. #if ENABLE_REGEX_CONFIG_OPTIONS
  121. , RegexStats* stats
  122. #endif
  123. ) const
  124. {
  125. const RuntimeCharTrie* curr = this;
  126. while (true)
  127. {
  128. if (curr->count == 0)
  129. return true;
  130. if (inputOffset >= inputLength)
  131. return false;
  132. #if ENABLE_REGEX_CONFIG_OPTIONS
  133. if (stats != 0)
  134. stats->numCompares++;
  135. #endif
  136. #if 0
  137. int l = 0;
  138. int h = curr->count - 1;
  139. while (true)
  140. {
  141. if (l > h)
  142. return false;
  143. int m = (l + h) / 2;
  144. if (curr->children[m].c == input[inputOffset])
  145. {
  146. inputOffset++;
  147. curr = &curr->children[m].node;
  148. break;
  149. }
  150. else if (CTU(curr->children[m].c) < CTU(input[inputOffset]))
  151. l = m + 1;
  152. else
  153. h = m - 1;
  154. }
  155. #else
  156. int i = 0;
  157. while (true)
  158. {
  159. if (curr->children[i].c == input[inputOffset])
  160. {
  161. inputOffset++;
  162. curr = &curr->children[i].node;
  163. break;
  164. }
  165. else if (curr->children[i].c > input[inputOffset])
  166. return false;
  167. else if (++i >= curr->count)
  168. return false;
  169. }
  170. #endif
  171. }
  172. }
  173. void RuntimeCharTrie::FreeBody(ArenaAllocator* allocator)
  174. {
  175. for (int i = 0; i < count; i++)
  176. children[i].node.FreeBody(allocator);
  177. if (count > 0)
  178. AdeleteArray(allocator, count, children);
  179. #if DBG
  180. count = 0;
  181. children = 0;
  182. #endif
  183. }
  184. void RuntimeCharTrie::CloneFrom(ArenaAllocator* allocator, const CharTrie& other)
  185. {
  186. count = other.count;
  187. if (count > 0)
  188. {
  189. children = AnewArray(allocator, RuntimeCharTrieEntry, count);
  190. for (int i = 0; i < count; i++)
  191. {
  192. children[i].c = other.children[i].c;
  193. children[i].node.CloneFrom(allocator, other.children[i].node);
  194. }
  195. }
  196. else
  197. children = 0;
  198. }
  199. #if ENABLE_REGEX_CONFIG_OPTIONS
  200. void RuntimeCharTrie::Print(DebugWriter* w) const
  201. {
  202. w->Indent();
  203. for (int i = 0; i < count; i++)
  204. {
  205. w->PrintQuotedChar(children[i].c);
  206. w->EOL();
  207. children[i].node.Print(w);
  208. }
  209. w->Unindent();
  210. }
  211. #endif
  212. }