RegexHelper.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. namespace Js
  7. {
  8. struct RegexMatchState;
  9. class RegexHelper
  10. {
  11. static const int MinTrigramInputLength=250000;
  12. //
  13. // Dynamic compilation
  14. //
  15. static bool GetFlags(ScriptContext* scriptContext, __in_ecount(strLen) const char16* str, CharCount strLen, UnifiedRegex::RegexFlags &flags);
  16. public:
  17. static UnifiedRegex::RegexPattern* CompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource);
  18. static UnifiedRegex::RegexPattern* CompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, UnifiedRegex::RegexFlags flags, bool isLiteralSource);
  19. private:
  20. static UnifiedRegex::RegexPattern* PrimCompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource);
  21. //
  22. // Primitives
  23. //
  24. public:
  25. static UnifiedRegex::GroupInfo SimpleMatch(ScriptContext * scriptContext, UnifiedRegex::RegexPattern * pattern, const char16 * inputStr, CharCount inputLength, CharCount offset);
  26. static Var NonMatchValue(ScriptContext* scriptContext, bool isGlobalCtor);
  27. static Var GetString(ScriptContext* scriptContext, JavascriptString* input, Var nonMatchValue, UnifiedRegex::GroupInfo group);
  28. static Var GetGroup(ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, JavascriptString* input, Var nonMatchValue, int groupId);
  29. private:
  30. static void PropagateLastMatch
  31. ( ScriptContext* scriptContext
  32. , bool isGlobal
  33. , bool isSticky
  34. , JavascriptRegExp* regularExpression
  35. , JavascriptString* lastInput
  36. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  37. , UnifiedRegex::GroupInfo lastActualMatch
  38. , bool updateRegex
  39. , bool updateCtor
  40. , bool useSplitPattern = false );
  41. static void PropagateLastMatchToRegex
  42. ( ScriptContext* scriptContext
  43. , bool isGlobal
  44. , bool isSticky
  45. , JavascriptRegExp* regularExpression
  46. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  47. , UnifiedRegex::GroupInfo lastActualMatch );
  48. static void PropagateLastMatchToCtor
  49. ( ScriptContext* scriptContext
  50. , JavascriptRegExp* regularExpression
  51. , JavascriptString* lastInput
  52. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  53. , bool useSplitPattern );
  54. static void InvalidateLastMatchOnCtor(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* lastInput, bool useSplitPattern = false);
  55. static bool GetInitialOffset(bool isGlobal, bool isSticky, JavascriptRegExp* regularExpression, CharCount inputLength, CharCount& offset);
  56. static JavascriptArray* CreateMatchResult(void *const stackAllocationPointer, ScriptContext* scriptContext, bool isGlobal, int numGroups, JavascriptString* input);
  57. static void FinalizeMatchResult(ScriptContext* scriptContext, bool isGlobal, JavascriptArray* arr, UnifiedRegex::GroupInfo match);
  58. static JavascriptArray* CreateExecResult(void *const stackAllocationPointer, ScriptContext* scriptContext, int numGroups, JavascriptString* input, UnifiedRegex::GroupInfo match);
  59. template<typename T> static T CheckCrossContextAndMarshalResult(T value, ScriptContext* targetContext);
  60. //
  61. // Regex entry points
  62. //
  63. public:
  64. static Var RegexMatchResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  65. static Var RegexMatchResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  66. static Var RegexMatchResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  67. static Var RegexMatch(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, bool noResult, void *const stackAllocationPointer = nullptr);
  68. static Var RegexMatchNoHistory(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult);
  69. static Var RegexExecResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  70. static Var RegexExecResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  71. static Var RegexExecResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  72. static Var RegexExec(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer = nullptr);
  73. static Var RegexTest(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input);
  74. template<bool mustMatchEntireInput> static BOOL RegexTest_NonScript(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength);
  75. private:
  76. static void PrimBeginMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, const char16* input, CharCount inputLength, bool alwaysNeedAlloc);
  77. static UnifiedRegex::GroupInfo PrimMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, CharCount inputLength, CharCount offset);
  78. static void PrimEndMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern);
  79. template<typename GroupFn>
  80. static void ReplaceFormatString
  81. ( ScriptContext* scriptContext
  82. , int numGroups
  83. , GroupFn getGroup
  84. , JavascriptString* input
  85. , const char16* matchedString
  86. , UnifiedRegex::GroupInfo match
  87. , JavascriptString* replace
  88. , int substitutions
  89. , __in_ecount(substitutions) CharCount* substitutionOffsets
  90. , CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& concatenated );
  91. public:
  92. static Var RegexReplaceResultUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace);
  93. static Var RegexReplaceResultNotUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace);
  94. static Var RegexReplace(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult);
  95. static Var RegexReplaceFunction(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replacefn);
  96. static Var StringReplace(JavascriptString* regularExpression, JavascriptString* input, JavascriptString* replace);
  97. static Var StringReplace(ScriptContext* scriptContext, JavascriptString* regularExpression, JavascriptString* input, JavascriptFunction* replacefn);
  98. static Var RegexSplitResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit);
  99. static Var RegexSplitResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit);
  100. static Var RegexSplitResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit);
  101. static Var RegexSplit(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer = nullptr);
  102. static Var RegexSearch(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  103. static Var StringSplit(JavascriptString* regularExpression, JavascriptString* input, CharCount limit);
  104. static bool IsResultNotUsed(CallFlags flags);
  105. private:
  106. static void AppendSubString(ScriptContext* scriptContext, JavascriptArray* ary, JavascriptString* input, CharCount startInclusive, CharCount endExclusive);
  107. template <bool updateHistory>
  108. static Var RegexMatchImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, bool noResult, void *const stackAllocationPointer = nullptr);
  109. static bool IsRegexSymbolMatchObservable(RecyclableObject* instance, ScriptContext* scriptContext);
  110. static Var RegexEs6MatchImpl(ScriptContext* scriptContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer);
  111. template <bool updateHistory>
  112. static Var RegexEs5MatchImpl(ScriptContext* scriptContext, JavascriptRegExp *regularExpression, JavascriptString *input, bool noResult, void *const stackAllocationPointer = nullptr);
  113. static Var RegexExecImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer = nullptr);
  114. static Var RegexEs5Replace(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult);
  115. static Var RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult);
  116. static bool IsRegexSymbolReplaceObservable(RecyclableObject* instance, ScriptContext* scriptContext);
  117. static Var RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult);
  118. static Var RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replaceFn);
  119. template<typename ReplacementFn>
  120. static Var RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, ReplacementFn appendReplacement, bool noResult);
  121. static Var RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult);
  122. static Var RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replacefn);
  123. static Var RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptFunction* replacefn);
  124. static Var RegexSearchImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  125. inline static UnifiedRegex::RegexPattern *GetSplitPattern(ScriptContext* scriptContext, JavascriptRegExp *regularExpression);
  126. static bool IsRegexSymbolSplitObservable(RecyclableObject* instance, ScriptContext* scriptContext);
  127. static Var RegexSplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer = nullptr);
  128. static Var RegexEs6SplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer = nullptr);
  129. static JavascriptString* AppendStickyToFlagsIfNeeded(JavascriptString* flags, ScriptContext* scriptContext);
  130. static Var RegexEs5SplitImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer = nullptr);
  131. static bool IsRegexTestObservable(RecyclableObject* instance, ScriptContext* scriptContext);
  132. static Var RegexEs6TestImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input);
  133. static Var RegexEs5TestImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input);
  134. static int GetReplaceSubstitutions(const char16 * const replaceStr, CharCount const replaceLength, ArenaAllocator * const tempAllocator, CharCount** const substitutionOffsetsOut);
  135. static RecyclableObject* ExecResultToRecyclableObject(Var result);
  136. static JavascriptString* GetMatchStrFromResult(RecyclableObject* result, ScriptContext* scriptContext);
  137. static void AdvanceLastIndex(RecyclableObject* instance, JavascriptString* input, JavascriptString* matchStr, bool unicode, ScriptContext* scriptContext);
  138. static charcount_t AdvanceStringIndex(JavascriptString* string, charcount_t index, bool isUnicode);
  139. };
  140. }