RegexPattern.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. namespace UnifiedRegex
  7. {
  8. RegexPattern::RegexPattern(Js::JavascriptLibrary *const library, Program* program, bool isLiteral)
  9. : library(library), isLiteral(isLiteral), isShallowClone(false), testCache(nullptr)
  10. {
  11. rep.unified.program = program;
  12. rep.unified.matcher = nullptr;
  13. rep.unified.trigramInfo = nullptr;
  14. }
  15. RegexPattern *RegexPattern::New(Js::ScriptContext *scriptContext, Program* program, bool isLiteral)
  16. {
  17. return
  18. RecyclerNewFinalized(
  19. scriptContext->GetRecycler(),
  20. RegexPattern,
  21. scriptContext->GetLibrary(),
  22. program,
  23. isLiteral);
  24. }
  25. void RegexPattern::Finalize(bool isShutdown)
  26. {
  27. if (isShutdown)
  28. {
  29. return;
  30. }
  31. const auto scriptContext = GetScriptContext();
  32. if (!scriptContext)
  33. {
  34. return;
  35. }
  36. #if DBG
  37. // In JSRT or ChakraEngine, we might not have a chance to close at finalize time
  38. if (!isLiteral && !scriptContext->IsClosed() &&
  39. !scriptContext->GetThreadContext()->IsJSRT() &&
  40. !scriptContext->GetLibrary()->IsChakraEngine())
  41. {
  42. const auto source = GetSource();
  43. RegexPattern *p = nullptr;
  44. bool hasRegexPatternForSourceKey = GetScriptContext()->GetDynamicRegexMap()->TryGetValue(
  45. RegexKey(source.GetBuffer(), source.GetLength(), GetFlags()), &p);
  46. bool isSourceLengthZero = source.GetLength() == 0;
  47. bool isUniquePattern = p != this;
  48. Assert(!hasRegexPatternForSourceKey || isSourceLengthZero || isUniquePattern);
  49. }
  50. #endif
  51. if (isShallowClone)
  52. {
  53. return;
  54. }
  55. rep.unified.program->FreeBody(scriptContext->RegexAllocator());
  56. }
  57. void RegexPattern::Dispose(bool isShutdown)
  58. {
  59. }
  60. Js::ScriptContext *RegexPattern::GetScriptContext() const
  61. {
  62. return library->GetScriptContext();
  63. }
  64. Js::InternalString RegexPattern::GetSource() const
  65. {
  66. return Js::InternalString(rep.unified.program->source, rep.unified.program->sourceLen);
  67. }
  68. RegexFlags RegexPattern::GetFlags() const
  69. {
  70. return rep.unified.program->flags;
  71. }
  72. uint16 RegexPattern::NumGroups() const
  73. {
  74. return rep.unified.program->numGroups;
  75. }
  76. bool RegexPattern::IsIgnoreCase() const
  77. {
  78. return (rep.unified.program->flags & IgnoreCaseRegexFlag) != 0;
  79. }
  80. bool RegexPattern::IsGlobal() const
  81. {
  82. return (rep.unified.program->flags & GlobalRegexFlag) != 0;
  83. }
  84. bool RegexPattern::IsMultiline() const
  85. {
  86. return (rep.unified.program->flags & MultilineRegexFlag) != 0;
  87. }
  88. bool RegexPattern::IsUnicode() const
  89. {
  90. return GetScriptContext()->GetConfig()->IsES6UnicodeExtensionsEnabled() && (rep.unified.program->flags & UnicodeRegexFlag) != 0;
  91. }
  92. bool RegexPattern::IsSticky() const
  93. {
  94. return GetScriptContext()->GetConfig()->IsES6RegExStickyEnabled() && (rep.unified.program->flags & StickyRegexFlag) != 0;
  95. }
  96. bool RegexPattern::WasLastMatchSuccessful() const
  97. {
  98. return rep.unified.matcher != 0 && rep.unified.matcher->WasLastMatchSuccessful();
  99. }
  100. GroupInfo RegexPattern::GetGroup(int groupId) const
  101. {
  102. Assert(groupId == 0 || WasLastMatchSuccessful());
  103. Assert(groupId >= 0 && groupId < NumGroups());
  104. return rep.unified.matcher->GetGroup(groupId);
  105. }
  106. RegexPattern *RegexPattern::CopyToScriptContext(Js::ScriptContext *scriptContext)
  107. {
  108. // This routine assumes that this instance will outlive the copy, which is the case for copy-on-write,
  109. // and therefore doesn't copy the immutable parts of the pattern. This should not be confused with a
  110. // would be CloneToScriptContext which will would clone the immutable parts as well because the lifetime
  111. // of a clone might be longer than the original.
  112. RegexPattern *result = UnifiedRegex::RegexPattern::New(scriptContext, rep.unified.program, isLiteral);
  113. Matcher *matcherClone = rep.unified.matcher ? rep.unified.matcher->CloneToScriptContext(scriptContext, result) : nullptr;
  114. result->rep.unified.matcher = matcherClone;
  115. result->isShallowClone = true;
  116. return result;
  117. }
  118. Field(RegExpTestCache*) RegexPattern::EnsureTestCache()
  119. {
  120. if (this->testCache == nullptr)
  121. {
  122. this->testCache = RecyclerNewPlusZ(this->library->GetRecycler(), TestCacheSize * sizeof(void*), RegExpTestCache);
  123. }
  124. return this->testCache;
  125. }
  126. /* static */
  127. uint RegexPattern::GetTestCacheIndex(Js::JavascriptString* str)
  128. {
  129. return (uint)(((uintptr_t)str) >> PolymorphicInlineCacheShift) & (TestCacheSize - 1);
  130. }
  131. #if ENABLE_REGEX_CONFIG_OPTIONS
  132. void RegexPattern::Print(DebugWriter* w)
  133. {
  134. w->Print(_u("/"));
  135. Js::InternalString str = GetSource();
  136. if (str.GetLength() == 0)
  137. w->Print(_u("(?:)"));
  138. else
  139. {
  140. for (charcount_t i = 0; i < str.GetLength(); ++i)
  141. {
  142. const char16 c = str.GetBuffer()[i];
  143. switch(c)
  144. {
  145. case _u('/'):
  146. w->Print(_u("\\%lc"), c);
  147. break;
  148. case _u('\n'):
  149. case _u('\r'):
  150. case _u('\x2028'):
  151. case _u('\x2029'):
  152. w->PrintEscapedChar(c);
  153. break;
  154. case _u('\\'):
  155. Assert(i + 1 < str.GetLength()); // cannot end in a '\'
  156. w->Print(_u("\\%lc"), str.GetBuffer()[++i]);
  157. break;
  158. default:
  159. w->PrintEscapedChar(c);
  160. break;
  161. }
  162. }
  163. }
  164. w->Print(_u("/"));
  165. if (IsIgnoreCase())
  166. w->Print(_u("i"));
  167. if (IsGlobal())
  168. w->Print(_u("g"));
  169. if (IsMultiline())
  170. w->Print(_u("m"));
  171. if (IsUnicode())
  172. w->Print(_u("u"));
  173. if (IsSticky())
  174. w->Print(_u("y"));
  175. w->Print(_u(" /* "));
  176. w->Print(_u(", "));
  177. w->Print(isLiteral ? _u("literal") : _u("dynamic"));
  178. w->Print(_u(" */"));
  179. }
  180. /* static */
  181. void RegexPattern::TraceTestCache(bool cacheHit, Js::JavascriptString* input, Js::JavascriptString* cachedValue, bool disabled)
  182. {
  183. if (REGEX_CONFIG_FLAG(RegexTracing))
  184. {
  185. if (disabled)
  186. {
  187. Output::Print(_u("Regexp Test Cache Disabled.\n"));
  188. }
  189. else if (cacheHit)
  190. {
  191. Output::Print(_u("Regexp Test Cache Hit.\n"));
  192. }
  193. else
  194. {
  195. Output::Print(_u("Regexp Test Cache Miss. "));
  196. if (cachedValue != nullptr)
  197. {
  198. Output::Print(_u("Input: (%p); Cached String: (%p) '%s'\n"), input, cachedValue, cachedValue->GetString());
  199. }
  200. else
  201. {
  202. Output::Print(_u("Cache was empty\n"));
  203. }
  204. }
  205. }
  206. }
  207. #endif
  208. }