RegexHelper.cpp 102 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeLibraryPch.h"
  6. // Parser Includes
  7. #include "DebugWriter.h"
  8. #include "RegexStats.h"
  9. #include "OctoquadIdentifier.h"
  10. #include "RegexCompileTime.h"
  11. #include "RegexParser.h"
  12. #include "RegexPattern.h"
  13. namespace Js
  14. {
  15. // ----------------------------------------------------------------------
  16. // Dynamic compilation
  17. // ----------------------------------------------------------------------
  18. // See also:
  19. // UnifiedRegex::Parser::Options(...)
  20. bool RegexHelper::GetFlags(Js::ScriptContext* scriptContext, __in_ecount(strLen) const char16* str, CharCount strLen, UnifiedRegex::RegexFlags &flags)
  21. {
  22. for (CharCount i = 0; i < strLen; i++)
  23. {
  24. switch (str[i])
  25. {
  26. case 'i':
  27. if ((flags & UnifiedRegex::IgnoreCaseRegexFlag) != 0)
  28. return false;
  29. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::IgnoreCaseRegexFlag);
  30. break;
  31. case 'g':
  32. if ((flags & UnifiedRegex::GlobalRegexFlag) != 0)
  33. return false;
  34. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::GlobalRegexFlag);
  35. break;
  36. case 'm':
  37. if ((flags & UnifiedRegex::MultilineRegexFlag) != 0)
  38. return false;
  39. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::MultilineRegexFlag);
  40. break;
  41. case 'u':
  42. if (scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled())
  43. {
  44. if((flags & UnifiedRegex::UnicodeRegexFlag) != 0)
  45. return false;
  46. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::UnicodeRegexFlag);
  47. break;
  48. }
  49. return false;
  50. case 'y':
  51. if (scriptContext->GetConfig()->IsES6RegExStickyEnabled())
  52. {
  53. if ((flags & UnifiedRegex::StickyRegexFlag) != 0)
  54. return false;
  55. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::StickyRegexFlag);
  56. break;
  57. }
  58. return false;
  59. default:
  60. return false;
  61. }
  62. }
  63. return true;
  64. }
  65. UnifiedRegex::RegexPattern* RegexHelper::CompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource)
  66. {
  67. Assert(psz != 0 && psz[csz] == 0);
  68. Assert(pszOpts != 0 || cszOpts == 0);
  69. Assert(pszOpts == 0 || pszOpts[cszOpts] == 0);
  70. UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
  71. if (pszOpts != NULL)
  72. {
  73. if (!GetFlags(scriptContext, pszOpts, cszOpts, flags))
  74. {
  75. // Compile in order to throw appropriate error for ill-formed flags
  76. PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
  77. Assert(false);
  78. }
  79. }
  80. if(isLiteralSource)
  81. {
  82. // The source is from a literal regex, so we're cloning a literal regex. Don't use the dynamic regex MRU map since
  83. // these literal regex patterns' lifetimes are tied with the function body.
  84. return PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
  85. }
  86. UnifiedRegex::RegexKey lookupKey(psz, csz, flags);
  87. UnifiedRegex::RegexPattern* pattern = nullptr;
  88. RegexPatternMruMap* dynamicRegexMap = scriptContext->GetDynamicRegexMap();
  89. if (!dynamicRegexMap->TryGetValue(lookupKey, &pattern))
  90. {
  91. pattern = PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
  92. // WARNING: Must calculate key again so that dictionary has copy of source associated with the pattern
  93. const auto source = pattern->GetSource();
  94. UnifiedRegex::RegexKey finalKey(source.GetBuffer(), source.GetLength(), flags);
  95. dynamicRegexMap->Add(finalKey, pattern);
  96. }
  97. return pattern;
  98. }
  99. UnifiedRegex::RegexPattern* RegexHelper::CompileDynamic(
  100. ScriptContext *scriptContext, const char16* psz, CharCount csz, UnifiedRegex::RegexFlags flags, bool isLiteralSource)
  101. {
  102. //
  103. // Regex compilations are mostly string parsing based. To avoid duplicating validation rules,
  104. // generate a trivial options string right here on the stack and delegate to the string parsing
  105. // based implementation.
  106. //
  107. const CharCount OPT_BUF_SIZE = 6;
  108. char16 opts[OPT_BUF_SIZE];
  109. CharCount i = 0;
  110. if (flags & UnifiedRegex::IgnoreCaseRegexFlag)
  111. {
  112. opts[i++] = _u('i');
  113. }
  114. if (flags & UnifiedRegex::GlobalRegexFlag)
  115. {
  116. opts[i++] = _u('g');
  117. }
  118. if (flags & UnifiedRegex::MultilineRegexFlag)
  119. {
  120. opts[i++] = _u('m');
  121. }
  122. if (flags & UnifiedRegex::UnicodeRegexFlag)
  123. {
  124. Assert(scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
  125. opts[i++] = _u('u');
  126. }
  127. if (flags & UnifiedRegex::StickyRegexFlag)
  128. {
  129. Assert(scriptContext->GetConfig()->IsES6RegExStickyEnabled());
  130. opts[i++] = _u('y');
  131. }
  132. Assert(i < OPT_BUF_SIZE);
  133. opts[i] = NULL;
  134. return CompileDynamic(scriptContext, psz, csz, opts, i, isLiteralSource);
  135. }
  136. UnifiedRegex::RegexPattern* RegexHelper::PrimCompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource)
  137. {
  138. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  139. // SEE ALSO: Scanner<EncodingPolicy>::ScanRegExpConstant()
  140. #ifdef PROFILE_EXEC
  141. scriptContext->ProfileBegin(Js::RegexCompilePhase);
  142. #endif
  143. ArenaAllocator* rtAllocator = scriptContext->RegexAllocator();
  144. #if ENABLE_REGEX_CONFIG_OPTIONS
  145. UnifiedRegex::DebugWriter *dw = 0;
  146. if (REGEX_CONFIG_FLAG(RegexDebug))
  147. dw = scriptContext->GetRegexDebugWriter();
  148. UnifiedRegex::RegexStats* stats = 0;
  149. #endif
  150. UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
  151. if(csz == 0 && cszOpts == 0)
  152. {
  153. // Fast path for compiling the empty regex with empty flags, for the RegExp constructor object and other cases.
  154. // These empty regexes are dynamic regexes and so this fast path only exists for dynamic regex compilation. The
  155. // standard chars in particular, do not need to be initialized to compile this regex.
  156. UnifiedRegex::Program* program = UnifiedRegex::Program::New(scriptContext->GetRecycler(), flags);
  157. UnifiedRegex::Parser<NullTerminatedUnicodeEncodingPolicy, false>::CaptureEmptySourceAndNoGroups(program);
  158. UnifiedRegex::RegexPattern* pattern = UnifiedRegex::RegexPattern::New(scriptContext, program, false);
  159. UnifiedRegex::Compiler::CompileEmptyRegex
  160. ( program
  161. , pattern
  162. #if ENABLE_REGEX_CONFIG_OPTIONS
  163. , dw
  164. , stats
  165. #endif
  166. );
  167. #ifdef PROFILE_EXEC
  168. scriptContext->ProfileEnd(Js::RegexCompilePhase);
  169. #endif
  170. return pattern;
  171. }
  172. #if ENABLE_REGEX_CONFIG_OPTIONS
  173. if (REGEX_CONFIG_FLAG(RegexProfile))
  174. scriptContext->GetRegexStatsDatabase()->BeginProfile();
  175. #endif
  176. BEGIN_TEMP_ALLOCATOR(ctAllocator, scriptContext, _u("UnifiedRegexParseAndCompile"));
  177. UnifiedRegex::StandardChars<char16>* standardChars = scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  178. UnifiedRegex::Node* root = 0;
  179. UnifiedRegex::Parser<NullTerminatedUnicodeEncodingPolicy, false> parser
  180. ( scriptContext
  181. , ctAllocator
  182. , standardChars
  183. , standardChars
  184. , false
  185. #if ENABLE_REGEX_CONFIG_OPTIONS
  186. , dw
  187. #endif
  188. );
  189. try
  190. {
  191. root = parser.ParseDynamic(psz, psz + csz, pszOpts, pszOpts + cszOpts, flags);
  192. }
  193. catch (UnifiedRegex::ParseError e)
  194. {
  195. END_TEMP_ALLOCATOR(ctAllocator, scriptContext);
  196. #ifdef PROFILE_EXEC
  197. scriptContext->ProfileEnd(Js::RegexCompilePhase);
  198. #endif
  199. Js::JavascriptError::ThrowSyntaxError(scriptContext, e.error);
  200. // never reached
  201. }
  202. const auto recycler = scriptContext->GetRecycler();
  203. UnifiedRegex::Program* program = UnifiedRegex::Program::New(recycler, flags);
  204. parser.CaptureSourceAndGroups(recycler, program, psz, csz, csz);
  205. UnifiedRegex::RegexPattern* pattern = UnifiedRegex::RegexPattern::New(scriptContext, program, isLiteralSource);
  206. #if ENABLE_REGEX_CONFIG_OPTIONS
  207. if (REGEX_CONFIG_FLAG(RegexProfile))
  208. {
  209. stats = scriptContext->GetRegexStatsDatabase()->GetRegexStats(pattern);
  210. scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Parse);
  211. }
  212. if (REGEX_CONFIG_FLAG(RegexTracing))
  213. {
  214. UnifiedRegex::DebugWriter* tw = scriptContext->GetRegexDebugWriter();
  215. tw->Print(_u("// REGEX COMPILE "));
  216. pattern->Print(tw);
  217. tw->EOL();
  218. }
  219. if (REGEX_CONFIG_FLAG(RegexProfile))
  220. scriptContext->GetRegexStatsDatabase()->BeginProfile();
  221. #endif
  222. UnifiedRegex::Compiler::Compile
  223. ( scriptContext
  224. , ctAllocator
  225. , rtAllocator
  226. , standardChars
  227. , program
  228. , root
  229. , parser.GetLitbuf()
  230. , pattern
  231. #if ENABLE_REGEX_CONFIG_OPTIONS
  232. , dw
  233. , stats
  234. #endif
  235. );
  236. #if ENABLE_REGEX_CONFIG_OPTIONS
  237. if (REGEX_CONFIG_FLAG(RegexProfile))
  238. scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Compile);
  239. #endif
  240. END_TEMP_ALLOCATOR(ctAllocator, scriptContext);
  241. #ifdef PROFILE_EXEC
  242. scriptContext->ProfileEnd(Js::RegexCompilePhase);
  243. #endif
  244. return pattern;
  245. }
  246. // ----------------------------------------------------------------------
  247. // Primitives
  248. // ----------------------------------------------------------------------
  249. #if ENABLE_REGEX_CONFIG_OPTIONS
  250. static void RegexHelperTrace(
  251. ScriptContext* scriptContext,
  252. UnifiedRegex::RegexStats::Use use,
  253. JavascriptRegExp* regExp,
  254. const char16 *const input,
  255. const CharCount inputLength,
  256. const char16 *const replace = 0,
  257. const CharCount replaceLength = 0)
  258. {
  259. Assert(regExp);
  260. Assert(input);
  261. if (REGEX_CONFIG_FLAG(RegexProfile))
  262. {
  263. UnifiedRegex::RegexStats* stats =
  264. scriptContext->GetRegexStatsDatabase()->GetRegexStats(regExp->GetPattern());
  265. stats->useCounts[use]++;
  266. stats->inputLength += inputLength;
  267. }
  268. if (REGEX_CONFIG_FLAG(RegexTracing))
  269. {
  270. UnifiedRegex::DebugWriter* w = scriptContext->GetRegexDebugWriter();
  271. w->Print(_u("%s("), UnifiedRegex::RegexStats::UseNames[use]);
  272. regExp->GetPattern()->Print(w);
  273. w->Print(_u(", "));
  274. if (!CONFIG_FLAG(Verbose) && inputLength > 1024)
  275. w->Print(_u("\"<string too large>\""));
  276. else
  277. w->PrintQuotedString(input, inputLength);
  278. if (replace != 0)
  279. {
  280. Assert(use == UnifiedRegex::RegexStats::Replace);
  281. w->Print(_u(", "));
  282. if (!CONFIG_FLAG(Verbose) && replaceLength > 1024)
  283. w->Print(_u("\"<string too large>\""));
  284. else
  285. w->PrintQuotedString(replace, replaceLength);
  286. }
  287. w->PrintEOL(_u(");"));
  288. w->Flush();
  289. }
  290. }
  291. static void RegexHelperTrace(ScriptContext* scriptContext, UnifiedRegex::RegexStats::Use use, JavascriptRegExp* regExp, JavascriptString* input)
  292. {
  293. Assert(regExp);
  294. Assert(input);
  295. RegexHelperTrace(scriptContext, use, regExp, input->GetString(), input->GetLength());
  296. }
  297. static void RegexHelperTrace(ScriptContext* scriptContext, UnifiedRegex::RegexStats::Use use, JavascriptRegExp* regExp, JavascriptString* input, JavascriptString* replace)
  298. {
  299. Assert(regExp);
  300. Assert(input);
  301. Assert(replace);
  302. RegexHelperTrace(scriptContext, use, regExp, input->GetString(), input->GetLength(), replace->GetString(), replace->GetLength());
  303. }
  304. #endif
  305. // ----------------------------------------------------------------------
  306. // Regex entry points
  307. // ----------------------------------------------------------------------
  308. struct RegexMatchState
  309. {
  310. const char16* input;
  311. TempArenaAllocatorObject* tempAllocatorObj;
  312. UnifiedRegex::Matcher* matcher;
  313. };
  314. template <bool updateHistory>
  315. Var RegexHelper::RegexMatchImpl(ScriptContext* scriptContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  316. {
  317. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  318. // Normally, this check would be done in JavascriptRegExp::EntrySymbolMatch. However,
  319. // since the lowerer inlines String.prototype.match and directly calls the helper,
  320. // the check then would be bypassed. That's the reason we do the check here.
  321. if (scriptConfig->IsES6RegExSymbolsEnabled()
  322. && IsRegexSymbolMatchObservable(thisObj, scriptContext))
  323. {
  324. // We don't need to pass "updateHistory" here since the call to "exec" will handle it.
  325. return RegexEs6MatchImpl(scriptContext, thisObj, input, noResult, stackAllocationPointer);
  326. }
  327. else
  328. {
  329. PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
  330. ? _u("RegExp.prototype[Symbol.match]")
  331. : _u("String.prototype.match");
  332. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  333. return RegexEs5MatchImpl<updateHistory>(scriptContext, regularExpression, input, noResult, stackAllocationPointer);
  334. }
  335. }
  336. bool RegexHelper::IsRegexSymbolMatchObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  337. {
  338. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  339. return !JavascriptRegExp::HasOriginalRegExType(instance)
  340. || JavascriptRegExp::HasObservableExec(regexPrototype)
  341. || JavascriptRegExp::HasObservableGlobalFlag(regexPrototype)
  342. || JavascriptRegExp::HasObservableUnicodeFlag(regexPrototype);
  343. }
  344. Var RegexHelper::RegexEs6MatchImpl(ScriptContext* scriptContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  345. {
  346. PCWSTR const varName = _u("RegExp.prototype[Symbol.match]");
  347. if (!JavascriptRegExp::GetGlobalProperty(thisObj, scriptContext))
  348. {
  349. return JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
  350. }
  351. else
  352. {
  353. bool unicode = JavascriptRegExp::GetUnicodeProperty(thisObj, scriptContext);
  354. JavascriptRegExp::SetLastIndexProperty(thisObj, TaggedInt::ToVarUnchecked(0), scriptContext);
  355. JavascriptArray* arrayResult = nullptr;
  356. do
  357. {
  358. Var result = JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
  359. if (JavascriptOperators::IsNull(result))
  360. {
  361. break;
  362. }
  363. RecyclableObject* resultObj = ExecResultToRecyclableObject(result);
  364. JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
  365. if (arrayResult == nullptr)
  366. {
  367. arrayResult = scriptContext->GetLibrary()->CreateArray();
  368. }
  369. arrayResult->DirectAppendItem(matchStr);
  370. AdvanceLastIndex(thisObj, input, matchStr, unicode, scriptContext);
  371. }
  372. while (true);
  373. return arrayResult != nullptr
  374. ? arrayResult
  375. : scriptContext->GetLibrary()->GetNull();
  376. }
  377. }
  378. // String.prototype.match (ES5 15.5.4.10)
  379. template <bool updateHistory>
  380. Var RegexHelper::RegexEs5MatchImpl(ScriptContext* scriptContext, JavascriptRegExp *regularExpression, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  381. {
  382. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  383. const char16* inputStr = input->GetString();
  384. CharCount inputLength = input->GetLength();
  385. #if ENABLE_REGEX_CONFIG_OPTIONS
  386. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Match, regularExpression, input);
  387. #endif
  388. UnifiedRegex::GroupInfo lastSuccessfulMatch; // initially undefined
  389. UnifiedRegex::GroupInfo lastActualMatch; // initially undefined
  390. #ifdef REGEX_TRIGRAMS
  391. UnifiedRegex::TrigramAlphabet* trigramAlphabet = scriptContext->GetTrigramAlphabet();
  392. UnifiedRegex::TrigramInfo* trigramInfo = pattern->rep.unified.trigramInfo;
  393. if (trigramAlphabet != NULL && inputLength >= MinTrigramInputLength && trigramInfo != NULL)
  394. {
  395. if (trigramAlphabet->input == NULL)
  396. {
  397. trigramAlphabet->MegaMatch((char16*)inputStr, inputLength);
  398. }
  399. if (trigramInfo->isTrigramPattern)
  400. {
  401. if (trigramInfo->resultCount > 0)
  402. {
  403. lastSuccessfulMatch.offset = trigramInfo->offsets[trigramInfo->resultCount - 1];
  404. lastSuccessfulMatch.length = UnifiedRegex::TrigramInfo::PatternLength;
  405. }
  406. // else: leave lastMatch undefined
  407. // Make sure a matcher is allocated and holds valid last match in case the RegExp constructor
  408. // needs to fill-in details from the last match via JavascriptRegExpConstructor::EnsureValues
  409. Assert(pattern->rep.unified.program != 0);
  410. if (pattern->rep.unified.matcher == 0)
  411. pattern->rep.unified.matcher = UnifiedRegex::Matcher::New(scriptContext, pattern);
  412. *pattern->rep.unified.matcher->GroupIdToGroupInfo(0) = lastSuccessfulMatch;
  413. Assert(pattern->IsGlobal());
  414. JavascriptArray* arrayResult = CreateMatchResult(stackAllocationPointer, scriptContext, /* isGlobal */ true, pattern->NumGroups(), input);
  415. FinalizeMatchResult(scriptContext, /* isGlobal */ true, arrayResult, lastSuccessfulMatch);
  416. if (trigramInfo->resultCount > 0)
  417. {
  418. if (trigramInfo->hasCachedResultString)
  419. {
  420. for (int k = 0; k < trigramInfo->resultCount; k++)
  421. {
  422. arrayResult->DirectSetItemAt(k,
  423. static_cast<Js::JavascriptString*>(trigramInfo->cachedResult[k]));
  424. }
  425. }
  426. else
  427. {
  428. for (int k = 0; k < trigramInfo->resultCount; k++)
  429. {
  430. JavascriptString * str = SubString::New(input, trigramInfo->offsets[k], UnifiedRegex::TrigramInfo::PatternLength);
  431. trigramInfo->cachedResult[k] = str;
  432. arrayResult->DirectSetItemAt(k, str);
  433. }
  434. trigramInfo->hasCachedResultString = true;
  435. }
  436. } // otherwise, there are no results and null will be returned
  437. if (updateHistory)
  438. {
  439. PropagateLastMatch(scriptContext, /* isGlobal */ true, pattern->IsSticky(), regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  440. }
  441. return lastSuccessfulMatch.IsUndefined() ? scriptContext->GetLibrary()->GetNull() : arrayResult;
  442. }
  443. }
  444. #endif
  445. // If global regex, result array holds substrings for each match, and group bindings are ignored
  446. // If non-global regex, result array holds overall substring and each group binding substring
  447. const bool isGlobal = pattern->IsGlobal();
  448. const bool isSticky = pattern->IsSticky();
  449. JavascriptArray* arrayResult = 0;
  450. RegexMatchState state;
  451. // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
  452. CharCount offset = 0;
  453. if (!isGlobal && isSticky)
  454. {
  455. offset = regularExpression->GetLastIndex();
  456. }
  457. uint32 globalIndex = 0;
  458. PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, false);
  459. do
  460. {
  461. if (offset > inputLength)
  462. {
  463. lastActualMatch.Reset();
  464. break;
  465. }
  466. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  467. if (lastActualMatch.IsUndefined())
  468. break;
  469. lastSuccessfulMatch = lastActualMatch;
  470. if (!noResult)
  471. {
  472. if (arrayResult == 0)
  473. arrayResult = CreateMatchResult(stackAllocationPointer, scriptContext, isGlobal, pattern->NumGroups(), input);
  474. JavascriptString *const matchedString = SubString::New(input, lastActualMatch.offset, lastActualMatch.length);
  475. if (isGlobal)
  476. arrayResult->DirectSetItemAt(globalIndex, matchedString);
  477. else
  478. {
  479. // The array's head segment up to length - 1 may not be filled. Write to the head segment element directly
  480. // instead of calling a helper that expects the segment to be pre-filled.
  481. Assert(globalIndex < arrayResult->GetHead()->length);
  482. static_cast<SparseArraySegment<Var> *>(arrayResult->GetHead())->elements[globalIndex] = matchedString;
  483. }
  484. globalIndex++;
  485. }
  486. offset = lastActualMatch.offset + max(lastActualMatch.length, static_cast<CharCountOrFlag>(1));
  487. } while (isGlobal);
  488. PrimEndMatch(state, scriptContext, pattern);
  489. if (updateHistory)
  490. {
  491. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  492. }
  493. if (arrayResult == 0)
  494. {
  495. return scriptContext->GetLibrary()->GetNull();
  496. }
  497. const int numGroups = pattern->NumGroups();
  498. if (!isGlobal)
  499. {
  500. if (numGroups > 1)
  501. {
  502. // Overall match already captured in index 0 by above, so just grab the groups
  503. Var nonMatchValue = NonMatchValue(scriptContext, false);
  504. Field(Var) *elements = ((SparseArraySegment<Var>*)arrayResult->GetHead())->elements;
  505. for (uint groupId = 1; groupId < (uint)numGroups; groupId++)
  506. {
  507. Assert(groupId < arrayResult->GetHead()->left + arrayResult->GetHead()->length);
  508. elements[groupId] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
  509. }
  510. }
  511. FinalizeMatchResult(scriptContext, /* isGlobal */ false, arrayResult, lastSuccessfulMatch);
  512. }
  513. else
  514. {
  515. FinalizeMatchResult(scriptContext, /* isGlobal */ true, arrayResult, lastSuccessfulMatch);
  516. }
  517. return arrayResult;
  518. }
  519. // RegExp.prototype.exec (ES5 15.10.6.2)
  520. Var RegexHelper::RegexExecImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer)
  521. {
  522. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  523. #if ENABLE_REGEX_CONFIG_OPTIONS
  524. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Exec, regularExpression, input);
  525. #endif
  526. const bool isGlobal = pattern->IsGlobal();
  527. const bool isSticky = pattern->IsSticky();
  528. CharCount offset;
  529. CharCount inputLength = input->GetLength();
  530. if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
  531. {
  532. return scriptContext->GetLibrary()->GetNull();
  533. }
  534. UnifiedRegex::GroupInfo match; // initially undefined
  535. if (offset <= inputLength)
  536. {
  537. const char16* inputStr = input->GetString();
  538. match = SimpleMatch(scriptContext, pattern, inputStr, inputLength, offset);
  539. }
  540. // else: match remains undefined
  541. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, match, match, true, true);
  542. if (noResult || match.IsUndefined())
  543. {
  544. return scriptContext->GetLibrary()->GetNull();
  545. }
  546. const int numGroups = pattern->NumGroups();
  547. Assert(numGroups >= 0);
  548. JavascriptArray* result = CreateExecResult(stackAllocationPointer, scriptContext, numGroups, input, match);
  549. Var nonMatchValue = NonMatchValue(scriptContext, false);
  550. Field(Var) *elements = ((SparseArraySegment<Var>*)result->GetHead())->elements;
  551. for (uint groupId = 0; groupId < (uint)numGroups; groupId++)
  552. {
  553. Assert(groupId < result->GetHead()->left + result->GetHead()->length);
  554. elements[groupId] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
  555. }
  556. return result;
  557. }
  558. Var RegexHelper::RegexTest(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString *input)
  559. {
  560. if (scriptContext->GetConfig()->IsES6RegExSymbolsEnabled()
  561. && IsRegexTestObservable(thisObj, scriptContext))
  562. {
  563. return RegexEs6TestImpl(scriptContext, thisObj, input);
  564. }
  565. else
  566. {
  567. JavascriptRegExp* regularExpression =
  568. JavascriptRegExp::ToRegExp(thisObj, _u("RegExp.prototype.test"), scriptContext);
  569. return RegexEs5TestImpl(scriptContext, regularExpression, input);
  570. }
  571. }
  572. bool RegexHelper::IsRegexTestObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  573. {
  574. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  575. return !JavascriptRegExp::HasOriginalRegExType(instance)
  576. || JavascriptRegExp::HasObservableExec(regexPrototype);
  577. }
  578. Var RegexHelper::RegexEs6TestImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString *input)
  579. {
  580. Var match = JavascriptRegExp::CallExec(thisObj, input, _u("RegExp.prototype.test"), scriptContext);
  581. return JavascriptBoolean::ToVar(!JavascriptOperators::IsNull(match), scriptContext);
  582. }
  583. // RegExp.prototype.test (ES5 15.10.6.3)
  584. Var RegexHelper::RegexEs5TestImpl(ScriptContext* scriptContext, JavascriptRegExp *regularExpression, JavascriptString *input)
  585. {
  586. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  587. const char16* inputStr = input->GetString();
  588. CharCount inputLength = input->GetLength();
  589. UnifiedRegex::GroupInfo match; // initially undefined
  590. const bool isGlobal = pattern->IsGlobal();
  591. const bool isSticky = pattern->IsSticky();
  592. const bool useCache = !isGlobal && !isSticky;
  593. UnifiedRegex::RegExpTestCache* cache = nullptr;
  594. JavascriptString * cachedInput = nullptr;
  595. uint cacheIndex = 0;
  596. bool cacheHit = false;
  597. bool cachedResult = false;
  598. if (useCache)
  599. {
  600. cache = pattern->EnsureTestCache();
  601. cacheIndex = UnifiedRegex::RegexPattern::GetTestCacheIndex(input);
  602. cachedInput = cache->inputArray[cacheIndex] != nullptr ? cache->inputArray[cacheIndex]->Get() : nullptr;
  603. cacheHit = cachedInput == input;
  604. }
  605. #if ENABLE_REGEX_CONFIG_OPTIONS
  606. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Test, regularExpression, input);
  607. UnifiedRegex::RegexPattern::TraceTestCache(cacheHit, input, cachedInput, !useCache);
  608. #endif
  609. if (cacheHit)
  610. {
  611. Assert(useCache);
  612. cachedResult = (cache->resultBV.Test(cacheIndex) != 0);
  613. // If our cache says this test should produce a match (which we aren't going to compute),
  614. // notify the Ctor to invalidate the last match so it must be recomputed before access.
  615. if (cachedResult)
  616. {
  617. InvalidateLastMatchOnCtor(scriptContext, regularExpression, input);
  618. }
  619. // for debug builds, let's still do the real test so we can validate values in the cache
  620. #if !DBG
  621. return JavascriptBoolean::ToVar(cachedResult, scriptContext);
  622. #endif
  623. }
  624. CharCount offset;
  625. if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
  626. {
  627. if (useCache)
  628. {
  629. Assert(offset == 0);
  630. Assert(!cacheHit || cachedInput == input);
  631. Assert(!cacheHit || cachedResult == false);
  632. cache->inputArray[cacheIndex] = regularExpression->GetRecycler()->CreateWeakReferenceHandle(input);
  633. cache->resultBV.Clear(cacheIndex);
  634. }
  635. return scriptContext->GetLibrary()->GetFalse();
  636. }
  637. if (offset <= inputLength)
  638. {
  639. match = SimpleMatch(scriptContext, pattern, inputStr, inputLength, offset);
  640. }
  641. // else: match remains undefined
  642. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, match, match, true, true);
  643. bool wasFound = !match.IsUndefined();
  644. if (useCache)
  645. {
  646. Assert(offset == 0);
  647. Assert(!cacheHit || cachedInput == input);
  648. Assert(!cacheHit || cachedResult == wasFound);
  649. cache->inputArray[cacheIndex] = regularExpression->GetRecycler()->CreateWeakReferenceHandle(input);
  650. if (wasFound)
  651. {
  652. cache->resultBV.Set(cacheIndex);
  653. }
  654. else
  655. {
  656. cache->resultBV.Clear(cacheIndex);
  657. }
  658. }
  659. return JavascriptBoolean::ToVar(wasFound, scriptContext);
  660. }
  661. template<typename GroupFn>
  662. void RegexHelper::ReplaceFormatString
  663. ( ScriptContext* scriptContext
  664. , int numGroups
  665. , GroupFn getGroup
  666. , JavascriptString* input
  667. , const char16* matchedString
  668. , UnifiedRegex::GroupInfo match
  669. , JavascriptString* replace
  670. , int substitutions
  671. , __in_ecount(substitutions) CharCount* substitutionOffsets
  672. , CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& concatenated )
  673. {
  674. Var nonMatchValue = NonMatchValue(scriptContext, false);
  675. const CharCount inputLength = input->GetLength();
  676. const char16* replaceStr = replace->GetString();
  677. const CharCount replaceLength = replace->GetLength();
  678. CharCount offset = 0;
  679. for (int i = 0; i < substitutions; i++)
  680. {
  681. CharCount substitutionOffset = substitutionOffsets[i];
  682. concatenated.Append(replace, offset, substitutionOffset - offset);
  683. char16 currentChar = replaceStr[substitutionOffset + 1];
  684. if (currentChar >= _u('0') && currentChar <= _u('9'))
  685. {
  686. // We've found a substitution ref, like $32. In accordance with the standard (sec-getsubstitution),
  687. // we recognize at most two decimal digits after the dollar sign.
  688. // This should be unsigned, but this would cause lots of compiler warnings unless we also make
  689. // numGroups unsigned, because of a comparison below.
  690. int captureIndex = (int)(currentChar - _u('0'));
  691. Assert(0 <= captureIndex && captureIndex <= 9); // numeric value of single decimal digit
  692. offset = substitutionOffset + 2;
  693. if (offset < replaceLength)
  694. {
  695. currentChar = replaceStr[substitutionOffset + 2];
  696. if (currentChar >= _u('0') && currentChar <= _u('9'))
  697. {
  698. // Should also be unsigned; see captureIndex above.
  699. int tempCaptureIndex = (10 * captureIndex) + (int)(currentChar - _u('0'));
  700. Assert(0 <= tempCaptureIndex && tempCaptureIndex < 100); // numeric value of 2-digit positive decimal number
  701. if (tempCaptureIndex < numGroups)
  702. {
  703. captureIndex = tempCaptureIndex;
  704. offset = substitutionOffset + 3;
  705. }
  706. }
  707. }
  708. Assert(0 <= captureIndex && captureIndex < 100); // as above, value of 2-digit positive decimal number
  709. if (captureIndex < numGroups && (captureIndex != 0))
  710. {
  711. Var group = getGroup(captureIndex, nonMatchValue);
  712. if (JavascriptString::Is(group))
  713. concatenated.Append(JavascriptString::UnsafeFromVar(group));
  714. else if (group != nonMatchValue)
  715. concatenated.Append(replace, substitutionOffset, offset - substitutionOffset);
  716. }
  717. else
  718. concatenated.Append(replace, substitutionOffset, offset - substitutionOffset);
  719. }
  720. else
  721. {
  722. switch (currentChar)
  723. {
  724. case _u('$'): // literal '$' character
  725. concatenated.Append(_u('$'));
  726. offset = substitutionOffset + 2;
  727. break;
  728. case _u('&'): // matched string
  729. concatenated.Append(matchedString, match.length);
  730. offset = substitutionOffset + 2;
  731. break;
  732. case _u('`'): // left context
  733. concatenated.Append(input, 0, match.offset);
  734. offset = substitutionOffset + 2;
  735. break;
  736. case _u('\''): // right context
  737. if (match.EndOffset() < inputLength)
  738. {
  739. concatenated.Append(input, match.EndOffset(), inputLength - match.EndOffset());
  740. }
  741. offset = substitutionOffset + 2;
  742. break;
  743. default:
  744. concatenated.Append(_u('$'));
  745. offset = substitutionOffset + 1;
  746. break;
  747. }
  748. }
  749. }
  750. concatenated.Append(replace, offset, replaceLength - offset);
  751. }
  752. int RegexHelper::GetReplaceSubstitutions(const char16 * const replaceStr, CharCount const replaceLength,
  753. ArenaAllocator * const tempAllocator, CharCount** const substitutionOffsetsOut)
  754. {
  755. int substitutions = 0;
  756. for (CharCount i = 0; i < replaceLength; i++)
  757. {
  758. if (replaceStr[i] == _u('$'))
  759. {
  760. if (++i < replaceLength)
  761. {
  762. substitutions++;
  763. }
  764. }
  765. }
  766. if (substitutions > 0)
  767. {
  768. CharCount* substitutionOffsets = AnewArray(tempAllocator, CharCount, substitutions);
  769. substitutions = 0;
  770. for (CharCount i = 0; i < replaceLength; i++)
  771. {
  772. if (replaceStr[i] == _u('$'))
  773. {
  774. if (i < (replaceLength - 1))
  775. {
  776. #pragma prefast(suppress:26000, "index doesn't overflow the buffer")
  777. substitutionOffsets[substitutions] = i;
  778. i++;
  779. substitutions++;
  780. }
  781. }
  782. }
  783. *substitutionOffsetsOut = substitutionOffsets;
  784. }
  785. return substitutions;
  786. }
  787. Var RegexHelper::RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
  788. {
  789. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  790. if (scriptConfig->IsES6RegExSymbolsEnabled() && IsRegexSymbolReplaceObservable(thisObj, scriptContext))
  791. {
  792. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, replace, noResult);
  793. }
  794. else
  795. {
  796. PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
  797. ? _u("RegExp.prototype[Symbol.replace]")
  798. : _u("String.prototype.replace");
  799. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  800. return RegexEs5ReplaceImpl(scriptContext, regularExpression, input, replace, noResult);
  801. }
  802. }
  803. bool RegexHelper::IsRegexSymbolReplaceObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  804. {
  805. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  806. return !JavascriptRegExp::HasOriginalRegExType(instance)
  807. || JavascriptRegExp::HasObservableUnicodeFlag(regexPrototype)
  808. || JavascriptRegExp::HasObservableExec(regexPrototype)
  809. || JavascriptRegExp::HasObservableGlobalFlag(regexPrototype);
  810. }
  811. Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
  812. {
  813. auto appendReplacement = [&](
  814. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& resultBuilder,
  815. ArenaAllocator* tempAlloc,
  816. JavascriptString* matchStr,
  817. int numberOfCaptures,
  818. Field(Var)* captures,
  819. CharCount position)
  820. {
  821. CharCount* substitutionOffsets = nullptr;
  822. int substitutions = GetReplaceSubstitutions(
  823. replace->GetString(),
  824. replace->GetLength(),
  825. tempAlloc,
  826. &substitutionOffsets);
  827. auto getGroup = [&](int captureIndex, Var nonMatchValue) {
  828. return captureIndex <= numberOfCaptures ? PointerValue(captures[captureIndex]) : nonMatchValue;
  829. };
  830. UnifiedRegex::GroupInfo match(position, matchStr->GetLength());
  831. int numGroups = numberOfCaptures + 1; // Take group 0 into account.
  832. ReplaceFormatString(
  833. scriptContext,
  834. numGroups,
  835. getGroup,
  836. input,
  837. matchStr->GetString(),
  838. match,
  839. replace,
  840. substitutions,
  841. substitutionOffsets,
  842. resultBuilder);
  843. };
  844. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, appendReplacement, noResult);
  845. }
  846. Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replaceFn)
  847. {
  848. auto appendReplacement = [&](
  849. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& resultBuilder,
  850. ArenaAllocator* tempAlloc,
  851. JavascriptString* matchStr,
  852. int numberOfCaptures,
  853. Field(Var)* captures,
  854. CharCount position)
  855. {
  856. // replaceFn Arguments:
  857. //
  858. // 0: this
  859. // 1: matched
  860. // 2: capture1
  861. // ...
  862. // N + 1: capture N
  863. // N + 2: position
  864. // N + 3: input
  865. // Number of captures can be at most 99, so we won't overflow.
  866. ushort argCount = (ushort) numberOfCaptures + 4;
  867. PROBE_STACK_NO_DISPOSE(scriptContext, argCount * sizeof(Var));
  868. Var* args = (Var*) _alloca(argCount * sizeof(Var));
  869. args[0] = scriptContext->GetLibrary()->GetUndefined();
  870. #pragma prefast(suppress:6386, "The write is within the bounds")
  871. args[1] = matchStr;
  872. for (int i = 1; i <= numberOfCaptures; ++i)
  873. {
  874. args[i + 1] = captures[i];
  875. }
  876. args[numberOfCaptures + 2] = JavascriptNumber::ToVar(position, scriptContext);
  877. args[numberOfCaptures + 3] = input;
  878. Js::Var replaceFnResult = scriptContext->GetThreadContext()->ExecuteImplicitCall(replaceFn, Js::ImplicitCall_Accessor, [=]()->Js::Var
  879. {
  880. return replaceFn->CallFunction(Arguments(CallInfo(argCount), args));
  881. });
  882. JavascriptString* replace = JavascriptConversion::ToString(replaceFnResult, scriptContext);
  883. resultBuilder.Append(replace);
  884. };
  885. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, appendReplacement, /* noResult */ false);
  886. }
  887. template<typename ReplacementFn>
  888. Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, ReplacementFn appendReplacement, bool noResult)
  889. {
  890. bool global = JavascriptRegExp::GetGlobalProperty(thisObj, scriptContext);
  891. bool unicode = false; // Dummy value. It isn't used below unless "global" is "true".
  892. if (global)
  893. {
  894. unicode = JavascriptRegExp::GetUnicodeProperty(thisObj, scriptContext);
  895. JavascriptRegExp::SetLastIndexProperty(thisObj, TaggedInt::ToVarUnchecked(0), scriptContext);
  896. }
  897. JavascriptString* accumulatedResult = nullptr;
  898. Recycler* recycler = scriptContext->GetRecycler();
  899. JsUtil::List<RecyclableObject*>* results = RecyclerNew(recycler, JsUtil::List<RecyclableObject*>, recycler);
  900. while (true)
  901. {
  902. PCWSTR varName = _u("RegExp.prototype[Symbol.replace]");
  903. Var result = JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
  904. if (JavascriptOperators::IsNull(result))
  905. {
  906. break;
  907. }
  908. RecyclableObject* resultObj = ExecResultToRecyclableObject(result);
  909. results->Add(resultObj);
  910. if (!global)
  911. {
  912. break;
  913. }
  914. JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
  915. AdvanceLastIndex(thisObj, input, matchStr, unicode, scriptContext);
  916. }
  917. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> accumulatedResultBuilder(scriptContext);
  918. CharCount inputLength = input->GetLength();
  919. CharCount nextSourcePosition = 0;
  920. size_t previousNumberOfCapturesToKeep = 0;
  921. Field(Var)* captures = nullptr;
  922. BEGIN_TEMP_ALLOCATOR(tempAlloc, scriptContext, _u("RegexHelper"))
  923. {
  924. results->Map([&](int resultIndex, RecyclableObject* resultObj) {
  925. int64 length = JavascriptConversion::ToLength(
  926. JavascriptOperators::GetProperty(resultObj, PropertyIds::length, scriptContext),
  927. scriptContext);
  928. uint64 numberOfCaptures = (uint64) max(length - 1, (int64) 0);
  929. JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
  930. int64 index = JavascriptConversion::ToLength(
  931. JavascriptOperators::GetProperty(resultObj, PropertyIds::index, scriptContext),
  932. scriptContext);
  933. CharCount position = max(
  934. min(JavascriptRegExp::GetIndexOrMax(index), inputLength),
  935. (CharCount) 0);
  936. // Capture groups can be referenced using at most two digits.
  937. const uint64 maxNumberOfCaptures = 99;
  938. size_t numberOfCapturesToKeep = (size_t) min(numberOfCaptures, maxNumberOfCaptures);
  939. if (captures == nullptr)
  940. {
  941. captures = RecyclerNewArray(recycler, Field(Var), numberOfCapturesToKeep + 1);
  942. }
  943. else if (numberOfCapturesToKeep != previousNumberOfCapturesToKeep)
  944. {
  945. size_t existingBytes = (previousNumberOfCapturesToKeep + 1) * sizeof(Var*);
  946. size_t requestedBytes = (numberOfCapturesToKeep + 1) * sizeof(Var*);
  947. captures = (Field(Var)*) recycler->Realloc(captures, existingBytes, requestedBytes);
  948. }
  949. previousNumberOfCapturesToKeep = numberOfCapturesToKeep;
  950. for (uint64 i = 1; i <= numberOfCaptures; ++i)
  951. {
  952. Var nextCapture = JavascriptOperators::GetItem(resultObj, i, scriptContext);
  953. if (!JavascriptOperators::IsUndefined(nextCapture))
  954. {
  955. nextCapture = JavascriptConversion::ToString(nextCapture, scriptContext);
  956. }
  957. if (i <= numberOfCapturesToKeep)
  958. {
  959. captures[i] = nextCapture;
  960. }
  961. }
  962. if (position >= nextSourcePosition)
  963. {
  964. CharCount substringLength = position - nextSourcePosition;
  965. accumulatedResultBuilder.Append(input, nextSourcePosition, substringLength);
  966. appendReplacement(accumulatedResultBuilder, tempAlloc, matchStr, (int) numberOfCapturesToKeep, captures, position);
  967. nextSourcePosition = JavascriptRegExp::AddIndex(position, matchStr->GetLength());
  968. }
  969. });
  970. }
  971. END_TEMP_ALLOCATOR(tempAlloc, scriptContext);
  972. if (nextSourcePosition < inputLength)
  973. {
  974. CharCount substringLength = inputLength - nextSourcePosition;
  975. accumulatedResultBuilder.Append(input, nextSourcePosition, substringLength);
  976. }
  977. accumulatedResult = accumulatedResultBuilder.ToString();
  978. Assert(accumulatedResult != nullptr);
  979. return accumulatedResult;
  980. }
  981. // String.prototype.replace, replace value has been converted to a string (ES5 15.5.4.11)
  982. Var RegexHelper::RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult)
  983. {
  984. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  985. const char16* replaceStr = replace->GetString();
  986. CharCount replaceLength = replace->GetLength();
  987. const char16* inputStr = input->GetString();
  988. CharCount inputLength = input->GetLength();
  989. JavascriptString* newString = nullptr;
  990. #if ENABLE_REGEX_CONFIG_OPTIONS
  991. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Replace, regularExpression, input, replace);
  992. #endif
  993. RegexMatchState state;
  994. PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, true);
  995. UnifiedRegex::GroupInfo lastActualMatch;
  996. UnifiedRegex::GroupInfo lastSuccessfulMatch;
  997. const bool isGlobal = pattern->IsGlobal();
  998. const bool isSticky = pattern->IsSticky();
  999. // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
  1000. CharCount offset = 0;
  1001. if (!isGlobal && isSticky)
  1002. {
  1003. offset = regularExpression->GetLastIndex();
  1004. }
  1005. if (!noResult)
  1006. {
  1007. CharCount* substitutionOffsets = nullptr;
  1008. int substitutions = GetReplaceSubstitutions(replaceStr, replaceLength,
  1009. state.tempAllocatorObj->GetAllocator(), &substitutionOffsets);
  1010. // Use to see if we already have partial result populated in concatenated
  1011. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(scriptContext);
  1012. // If lastIndex > 0, append input[0..offset] characters to the result
  1013. if (offset > 0)
  1014. {
  1015. concatenated.Append(input, 0, min(offset, inputLength));
  1016. }
  1017. do
  1018. {
  1019. if (offset > inputLength)
  1020. {
  1021. lastActualMatch.Reset();
  1022. break;
  1023. }
  1024. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1025. if (lastActualMatch.IsUndefined())
  1026. break;
  1027. lastSuccessfulMatch = lastActualMatch;
  1028. concatenated.Append(input, offset, lastActualMatch.offset - offset);
  1029. if (substitutionOffsets != 0)
  1030. {
  1031. auto getGroup = [&](int captureIndex, Var nonMatchValue) {
  1032. return GetGroup(scriptContext, pattern, input, nonMatchValue, captureIndex);
  1033. };
  1034. const char16* matchedString = inputStr + lastActualMatch.offset;
  1035. ReplaceFormatString(scriptContext, pattern->NumGroups(), getGroup, input, matchedString, lastActualMatch, replace, substitutions, substitutionOffsets, concatenated);
  1036. }
  1037. else
  1038. {
  1039. concatenated.Append(replace);
  1040. }
  1041. if (lastActualMatch.length == 0)
  1042. {
  1043. if (lastActualMatch.offset < inputLength)
  1044. {
  1045. concatenated.Append(inputStr[lastActualMatch.offset]);
  1046. }
  1047. offset = lastActualMatch.offset + 1;
  1048. }
  1049. else
  1050. {
  1051. offset = lastActualMatch.EndOffset();
  1052. }
  1053. }
  1054. while (isGlobal);
  1055. if (offset == 0)
  1056. {
  1057. // There was no successful match so the result is the input string.
  1058. newString = input;
  1059. }
  1060. else
  1061. {
  1062. if (offset < inputLength)
  1063. {
  1064. concatenated.Append(input, offset, inputLength - offset);
  1065. }
  1066. newString = concatenated.ToString();
  1067. }
  1068. substitutionOffsets = 0;
  1069. }
  1070. else
  1071. {
  1072. do
  1073. {
  1074. if (offset > inputLength)
  1075. {
  1076. lastActualMatch.Reset();
  1077. break;
  1078. }
  1079. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1080. if (lastActualMatch.IsUndefined())
  1081. break;
  1082. lastSuccessfulMatch = lastActualMatch;
  1083. offset = lastActualMatch.length == 0? lastActualMatch.offset + 1 : lastActualMatch.EndOffset();
  1084. }
  1085. while (isGlobal);
  1086. newString = scriptContext->GetLibrary()->GetEmptyString();
  1087. }
  1088. PrimEndMatch(state, scriptContext, pattern);
  1089. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  1090. return newString;
  1091. }
  1092. Var RegexHelper::RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replacefn)
  1093. {
  1094. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  1095. if (scriptConfig->IsES6RegExSymbolsEnabled() && IsRegexSymbolReplaceObservable(thisObj, scriptContext))
  1096. {
  1097. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, replacefn);
  1098. }
  1099. else
  1100. {
  1101. PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
  1102. ? _u("RegExp.prototype[Symbol.replace]")
  1103. : _u("String.prototype.replace");
  1104. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  1105. return RegexEs5ReplaceImpl(scriptContext, regularExpression, input, replacefn);
  1106. }
  1107. }
  1108. // String.prototype.replace, replace value is a function (ES5 15.5.4.11)
  1109. Var RegexHelper::RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptFunction* replacefn)
  1110. {
  1111. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1112. JavascriptString* newString = nullptr;
  1113. const char16* inputStr = input->GetString();
  1114. CharCount inputLength = input->GetLength();
  1115. const int rawNumGroups = pattern->NumGroups();
  1116. Var nonMatchValue = NonMatchValue(scriptContext, false);
  1117. UnifiedRegex::GroupInfo lastMatch; // initially undefined
  1118. AssertOrFailFast(0 < rawNumGroups && rawNumGroups <= INT16_MAX);
  1119. const uint16 numGroups = uint16(rawNumGroups);
  1120. #if ENABLE_REGEX_CONFIG_OPTIONS
  1121. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Replace, regularExpression, input, scriptContext->GetLibrary()->CreateStringFromCppLiteral(_u("<replace function>")));
  1122. #endif
  1123. RegexMatchState state;
  1124. PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, false);
  1125. // NOTE: These must be kept out of the scope of the try below!
  1126. const bool isGlobal = pattern->IsGlobal();
  1127. const bool isSticky = pattern->IsSticky();
  1128. // If global = true, set lastIndex to 0 in case it is used in replacefn
  1129. if (isGlobal)
  1130. {
  1131. regularExpression->SetLastIndex(0);
  1132. }
  1133. // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
  1134. CharCount offset = 0;
  1135. if (!isGlobal && isSticky)
  1136. {
  1137. offset = regularExpression->GetLastIndex();
  1138. }
  1139. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(scriptContext);
  1140. UnifiedRegex::GroupInfo lastActualMatch;
  1141. UnifiedRegex::GroupInfo lastSuccessfulMatch;
  1142. // Replace function must be called with arguments (<function's this>, group0, ..., groupn, offset, input)
  1143. // The garbage collector must know about this array since it is being passed back into script land
  1144. Var* replaceArgs;
  1145. PROBE_STACK_NO_DISPOSE(scriptContext, (numGroups + 3) * sizeof(Var));
  1146. replaceArgs = (Var*)_alloca((numGroups + 3) * sizeof(Var));
  1147. replaceArgs[0] = scriptContext->GetLibrary()->GetUndefined();
  1148. replaceArgs[numGroups + 2] = input;
  1149. if (offset > 0)
  1150. {
  1151. concatenated.Append(input, 0, min(offset, inputLength));
  1152. }
  1153. do
  1154. {
  1155. if (offset > inputLength)
  1156. {
  1157. lastActualMatch.Reset();
  1158. break;
  1159. }
  1160. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1161. if (lastActualMatch.IsUndefined())
  1162. break;
  1163. lastSuccessfulMatch = lastActualMatch;
  1164. for (int groupId = 0; groupId < numGroups; groupId++)
  1165. replaceArgs[groupId + 1] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
  1166. replaceArgs[numGroups + 1] = JavascriptNumber::ToVar(lastActualMatch.offset, scriptContext);
  1167. // The called function must see the global state updated by the current match
  1168. // (Should the function reach into a RegExp field, the pattern will still be valid, thus there's no
  1169. // danger of the primitive regex matcher being re-entered)
  1170. // WARNING: We go off into script land here, which way in turn invoke a regex operation, even on the
  1171. // same regex.
  1172. ThreadContext* threadContext = scriptContext->GetThreadContext();
  1173. Var replaceVar = threadContext->ExecuteImplicitCall(replacefn, ImplicitCall_Accessor, [=]()->Js::Var
  1174. {
  1175. return replacefn->CallFunction(Arguments(CallInfo(UInt16Math::Add(numGroups, 3)), replaceArgs));
  1176. });
  1177. JavascriptString* replace = JavascriptConversion::ToString(replaceVar, scriptContext);
  1178. concatenated.Append(input, offset, lastActualMatch.offset - offset);
  1179. concatenated.Append(replace);
  1180. if (lastActualMatch.length == 0)
  1181. {
  1182. if (lastActualMatch.offset < inputLength)
  1183. {
  1184. concatenated.Append(inputStr[lastActualMatch.offset]);
  1185. }
  1186. offset = lastActualMatch.offset + 1;
  1187. }
  1188. else
  1189. {
  1190. offset = lastActualMatch.EndOffset();
  1191. }
  1192. }
  1193. while (isGlobal);
  1194. PrimEndMatch(state, scriptContext, pattern);
  1195. if (offset == 0)
  1196. {
  1197. // There was no successful match so the result is the input string.
  1198. newString = input;
  1199. }
  1200. else
  1201. {
  1202. if (offset < inputLength)
  1203. {
  1204. concatenated.Append(input, offset, inputLength - offset);
  1205. }
  1206. newString = concatenated.ToString();
  1207. }
  1208. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  1209. return newString;
  1210. }
  1211. Var RegexHelper::StringReplace(JavascriptString* match, JavascriptString* input, JavascriptString* replace)
  1212. {
  1213. CharCount matchedIndex = JavascriptString::strstr(input, match, true);
  1214. if (matchedIndex == CharCountFlag)
  1215. {
  1216. return input;
  1217. }
  1218. const char16 *const replaceStr = replace->GetString();
  1219. // Unfortunately, due to the possibility of there being $ escapes, we can't just wmemcpy the replace string. Check if we
  1220. // have a small replace string that we can quickly scan for '$', to see if we can just wmemcpy.
  1221. bool definitelyNoEscapes = replace->GetLength() == 0;
  1222. if(!definitelyNoEscapes && replace->GetLength() <= 8)
  1223. {
  1224. CharCount i = 0;
  1225. for(; i < replace->GetLength() && replaceStr[i] != _u('$'); ++i);
  1226. definitelyNoEscapes = i >= replace->GetLength();
  1227. }
  1228. if(definitelyNoEscapes)
  1229. {
  1230. const char16* inputStr = input->GetString();
  1231. const char16* prefixStr = inputStr;
  1232. CharCount prefixLength = (CharCount)matchedIndex;
  1233. const char16* postfixStr = inputStr + prefixLength + match->GetLength();
  1234. CharCount postfixLength = input->GetLength() - prefixLength - match->GetLength();
  1235. CharCount newLength = prefixLength + postfixLength + replace->GetLength();
  1236. BufferStringBuilder bufferString(newLength, match->GetScriptContext());
  1237. bufferString.SetContent(prefixStr, prefixLength,
  1238. replaceStr, replace->GetLength(),
  1239. postfixStr, postfixLength);
  1240. return bufferString.ToString();
  1241. }
  1242. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(input->GetScriptContext());
  1243. // Copy portion of input string that precedes the matched substring
  1244. concatenated.Append(input, 0, matchedIndex);
  1245. // Copy the replace string with substitutions
  1246. CharCount i = 0, j = 0;
  1247. for(; j < replace->GetLength(); ++j)
  1248. {
  1249. if(replaceStr[j] == _u('$') && j + 1 < replace->GetLength())
  1250. {
  1251. switch(replaceStr[j + 1])
  1252. {
  1253. case _u('$'): // literal '$'
  1254. ++j;
  1255. concatenated.Append(replace, i, j - i);
  1256. i = j + 1;
  1257. break;
  1258. case _u('&'): // matched substring
  1259. concatenated.Append(replace, i, j - i);
  1260. concatenated.Append(match);
  1261. ++j;
  1262. i = j + 1;
  1263. break;
  1264. case _u('`'): // portion of input string that precedes the matched substring
  1265. concatenated.Append(replace, i, j - i);
  1266. concatenated.Append(input, 0, matchedIndex);
  1267. ++j;
  1268. i = j + 1;
  1269. break;
  1270. case _u('\''): // portion of input string that follows the matched substring
  1271. concatenated.Append(replace, i, j - i);
  1272. concatenated.Append(
  1273. input,
  1274. matchedIndex + match->GetLength(),
  1275. input->GetLength() - matchedIndex - match->GetLength());
  1276. ++j;
  1277. i = j + 1;
  1278. break;
  1279. default: // take both the initial '$' and the following character literally
  1280. ++j;
  1281. }
  1282. }
  1283. }
  1284. Assert(i <= j);
  1285. concatenated.Append(replace, i, j - i);
  1286. // Copy portion of input string that follows the matched substring
  1287. concatenated.Append(input, matchedIndex + match->GetLength(), input->GetLength() - matchedIndex - match->GetLength());
  1288. return concatenated.ToString();
  1289. }
  1290. Var RegexHelper::StringReplace(ScriptContext* scriptContext, JavascriptString* match, JavascriptString* input, JavascriptFunction* replacefn)
  1291. {
  1292. CharCount indexMatched = JavascriptString::strstr(input, match, true);
  1293. Assert(match->GetScriptContext() == scriptContext);
  1294. Assert(input->GetScriptContext() == scriptContext);
  1295. if (indexMatched != CharCountFlag)
  1296. {
  1297. ThreadContext* threadContext = scriptContext->GetThreadContext();
  1298. Var replaceVar = threadContext->ExecuteImplicitCall(replacefn, ImplicitCall_Accessor, [=]()->Js::Var
  1299. {
  1300. Var pThis = scriptContext->GetLibrary()->GetUndefined();
  1301. return CALL_FUNCTION(threadContext, replacefn, CallInfo(4), pThis, match, JavascriptNumber::ToVar((int)indexMatched, scriptContext), input);
  1302. });
  1303. JavascriptString* replace = JavascriptConversion::ToString(replaceVar, scriptContext);
  1304. const char16* inputStr = input->GetString();
  1305. const char16* prefixStr = inputStr;
  1306. CharCount prefixLength = indexMatched;
  1307. const char16* postfixStr = inputStr + prefixLength + match->GetLength();
  1308. CharCount postfixLength = input->GetLength() - prefixLength - match->GetLength();
  1309. CharCount newLength = prefixLength + postfixLength + replace->GetLength();
  1310. BufferStringBuilder bufferString(newLength, match->GetScriptContext());
  1311. bufferString.SetContent(prefixStr, prefixLength,
  1312. replace->GetString(), replace->GetLength(),
  1313. postfixStr, postfixLength);
  1314. return bufferString.ToString();
  1315. }
  1316. return input;
  1317. }
  1318. void RegexHelper::AppendSubString(ScriptContext* scriptContext, JavascriptArray* ary, JavascriptString* input, CharCount startInclusive, CharCount endExclusive)
  1319. {
  1320. Assert(endExclusive >= startInclusive);
  1321. Assert(endExclusive <= input->GetLength());
  1322. CharCount length = endExclusive - startInclusive;
  1323. JavascriptString* subString;
  1324. if (length == 0)
  1325. {
  1326. subString = scriptContext->GetLibrary()->GetEmptyString();
  1327. }
  1328. else if (length == 1)
  1329. {
  1330. subString = scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(input->GetString()[startInclusive]);
  1331. }
  1332. else
  1333. {
  1334. subString = SubString::New(input, startInclusive, length);
  1335. }
  1336. ary->DirectAppendItem(subString);
  1337. }
  1338. inline UnifiedRegex::RegexPattern *RegexHelper::GetSplitPattern(ScriptContext* scriptContext, JavascriptRegExp *regularExpression)
  1339. {
  1340. UnifiedRegex::RegexPattern* splitPattern = regularExpression->GetSplitPattern();
  1341. if (!splitPattern)
  1342. {
  1343. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1344. bool isSticky = (pattern->GetFlags() & UnifiedRegex::StickyRegexFlag) != 0;
  1345. if (!isSticky)
  1346. {
  1347. splitPattern = pattern;
  1348. }
  1349. else
  1350. {
  1351. // When the sticky flag is present, the pattern will match the input only at
  1352. // the beginning since "lastIndex" is set to 0 before the first iteration.
  1353. // However, for split(), we need to look for the pattern anywhere in the input.
  1354. //
  1355. // One way to handle this is to use the original pattern with the sticky flag and
  1356. // when it fails, move to the next character and retry.
  1357. //
  1358. // Another way, which is implemented here, is to create another pattern without the
  1359. // sticky flag and have it automatically look for itself anywhere in the input. This
  1360. // way, we can also take advantage of the optimizations for the global search (e.g.,
  1361. // the Boyer-Moore string search).
  1362. InternalString source = pattern->GetSource();
  1363. UnifiedRegex::RegexFlags nonStickyFlags =
  1364. static_cast<UnifiedRegex::RegexFlags>(pattern->GetFlags() & ~UnifiedRegex::StickyRegexFlag);
  1365. splitPattern = CompileDynamic(
  1366. scriptContext,
  1367. source.GetBuffer(),
  1368. source.GetLength(),
  1369. nonStickyFlags,
  1370. pattern->IsLiteral());
  1371. }
  1372. regularExpression->SetSplitPattern(splitPattern);
  1373. }
  1374. return splitPattern;
  1375. }
  1376. Var RegexHelper::RegexSplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  1377. {
  1378. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  1379. if (scriptConfig->IsES6RegExSymbolsEnabled()
  1380. && IsRegexSymbolSplitObservable(thisObj, scriptContext))
  1381. {
  1382. return RegexEs6SplitImpl(scriptContext, thisObj, input, limit, noResult, stackAllocationPointer);
  1383. }
  1384. else
  1385. {
  1386. PCWSTR varName = scriptContext->GetConfig()->IsES6RegExSymbolsEnabled()
  1387. ? _u("RegExp.prototype[Symbol.split]")
  1388. : _u("String.prototype.split");
  1389. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  1390. return RegexEs5SplitImpl(scriptContext, regularExpression, input, limit, noResult, stackAllocationPointer);
  1391. }
  1392. }
  1393. bool RegexHelper::IsRegexSymbolSplitObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  1394. {
  1395. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  1396. return !JavascriptRegExp::HasOriginalRegExType(instance)
  1397. || JavascriptRegExp::HasObservableConstructor(regexPrototype)
  1398. || JavascriptRegExp::HasObservableFlags(regexPrototype)
  1399. || JavascriptRegExp::HasObservableExec(regexPrototype);
  1400. }
  1401. Var RegexHelper::RegexEs6SplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  1402. {
  1403. PCWSTR const varName = _u("RegExp.prototype[Symbol.split]");
  1404. JavascriptFunction* defaultConstructor = scriptContext->GetLibrary()->GetRegExpConstructor();
  1405. RecyclableObject* speciesConstructor = JavascriptOperators::SpeciesConstructor(
  1406. thisObj,
  1407. defaultConstructor,
  1408. scriptContext);
  1409. AssertOrFailFast(JavascriptOperators::IsConstructor(speciesConstructor));
  1410. JavascriptString* flags = JavascriptConversion::ToString(
  1411. JavascriptOperators::GetProperty(thisObj, PropertyIds::flags, scriptContext),
  1412. scriptContext);
  1413. bool unicode = wcsstr(flags->GetString(), _u("u")) != nullptr;
  1414. flags = AppendStickyToFlagsIfNeeded(flags, scriptContext);
  1415. Var regEx = JavascriptOperators::NewObjectCreationHelper_ReentrancySafe(speciesConstructor, defaultConstructor, scriptContext->GetThreadContext(), [=]()->Js::Var
  1416. {
  1417. Js::Var args[] = { speciesConstructor, thisObj, flags };
  1418. Js::CallInfo callInfo(Js::CallFlags_New, _countof(args));
  1419. return JavascriptOperators::NewScObject(
  1420. speciesConstructor,
  1421. Js::Arguments(callInfo, args),
  1422. scriptContext);
  1423. });
  1424. RecyclableObject* splitter = RecyclableObject::UnsafeFromVar(regEx);
  1425. JavascriptArray* arrayResult = scriptContext->GetLibrary()->CreateArray();
  1426. if (limit == 0)
  1427. {
  1428. return arrayResult;
  1429. }
  1430. CharCount inputLength = input->GetLength();
  1431. if (inputLength == 0)
  1432. {
  1433. Var result = JavascriptRegExp::CallExec(splitter, input, varName, scriptContext);
  1434. if (!JavascriptOperators::IsNull(result))
  1435. {
  1436. return arrayResult;
  1437. }
  1438. arrayResult->DirectAppendItem(input);
  1439. return arrayResult;
  1440. }
  1441. CharCount substringStartIndex = 0; // 'p' in spec
  1442. CharCount substringEndIndex = substringStartIndex; // 'q' in spec
  1443. do // inputLength > 0
  1444. {
  1445. JavascriptRegExp::SetLastIndexProperty(splitter, substringEndIndex, scriptContext);
  1446. Var result = JavascriptRegExp::CallExec(splitter, input, varName, scriptContext); // 'z' in spec
  1447. if (JavascriptOperators::IsNull(result))
  1448. {
  1449. substringEndIndex = AdvanceStringIndex(input, substringEndIndex, unicode);
  1450. }
  1451. else
  1452. {
  1453. CharCount endIndex = JavascriptRegExp::GetLastIndexProperty(splitter, scriptContext); // 'e' in spec
  1454. endIndex = min(endIndex, inputLength);
  1455. if (endIndex == substringStartIndex)
  1456. {
  1457. substringEndIndex = AdvanceStringIndex(input, substringEndIndex, unicode);
  1458. }
  1459. else
  1460. {
  1461. AppendSubString(scriptContext, arrayResult, input, substringStartIndex, substringEndIndex);
  1462. if (arrayResult->GetLength() == limit)
  1463. {
  1464. return arrayResult;
  1465. }
  1466. substringStartIndex = endIndex;
  1467. RecyclableObject* resultObject = ExecResultToRecyclableObject(result);
  1468. int64 length = JavascriptConversion::ToLength(
  1469. JavascriptOperators::GetProperty(resultObject, PropertyIds::length, scriptContext),
  1470. scriptContext);
  1471. uint64 numberOfCaptures = max(length - 1, (int64) 0);
  1472. for (uint64 i = 1; i <= numberOfCaptures; ++i)
  1473. {
  1474. Var nextCapture = JavascriptOperators::GetItem(resultObject, i, scriptContext);
  1475. arrayResult->DirectAppendItem(nextCapture);
  1476. if (arrayResult->GetLength() == limit)
  1477. {
  1478. return arrayResult;
  1479. }
  1480. }
  1481. substringEndIndex = substringStartIndex;
  1482. }
  1483. }
  1484. }
  1485. while (substringEndIndex < inputLength);
  1486. AppendSubString(scriptContext, arrayResult, input, substringStartIndex, substringEndIndex);
  1487. return arrayResult;
  1488. }
  1489. JavascriptString* RegexHelper::AppendStickyToFlagsIfNeeded(JavascriptString* flags, ScriptContext* scriptContext)
  1490. {
  1491. const char16* flagsString = flags->GetString();
  1492. if (wcsstr(flagsString, _u("y")) == nullptr)
  1493. {
  1494. BEGIN_TEMP_ALLOCATOR(tempAlloc, scriptContext, _u("RegexHelper"))
  1495. {
  1496. StringBuilder<ArenaAllocator> bs(tempAlloc, flags->GetLength() + 1);
  1497. bs.Append(flagsString, flags->GetLength());
  1498. bs.Append(_u('y'));
  1499. flags = Js::JavascriptString::NewCopyBuffer(bs.Detach(), bs.Count(), scriptContext);
  1500. }
  1501. END_TEMP_ALLOCATOR(tempAlloc, scriptContext);
  1502. }
  1503. return flags;
  1504. }
  1505. // String.prototype.split (ES5 15.5.4.14)
  1506. Var RegexHelper::RegexEs5SplitImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  1507. {
  1508. if (noResult && scriptContext->GetConfig()->SkipSplitOnNoResult())
  1509. {
  1510. // TODO: Fix this so that the side effect for PropagateLastMatch is done
  1511. return scriptContext->GetLibrary()->GetNull();
  1512. }
  1513. #if ENABLE_REGEX_CONFIG_OPTIONS
  1514. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Split, regularExpression, input);
  1515. #endif
  1516. JavascriptArray* ary = scriptContext->GetLibrary()->CreateArrayOnStack(stackAllocationPointer);
  1517. if (limit == 0)
  1518. {
  1519. // SPECIAL CASE: Zero limit
  1520. return ary;
  1521. }
  1522. UnifiedRegex::RegexPattern *splitPattern = GetSplitPattern(scriptContext, regularExpression);
  1523. const char16* inputStr = input->GetString();
  1524. CharCount inputLength = input->GetLength(); // s in spec
  1525. const int numGroups = splitPattern->NumGroups();
  1526. Var nonMatchValue = NonMatchValue(scriptContext, false);
  1527. UnifiedRegex::GroupInfo lastSuccessfulMatch; // initially undefined
  1528. RegexMatchState state;
  1529. PrimBeginMatch(state, scriptContext, splitPattern, inputStr, inputLength, false);
  1530. if (inputLength == 0)
  1531. {
  1532. // SPECIAL CASE: Empty string
  1533. UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, splitPattern, inputLength, 0);
  1534. if (match.IsUndefined())
  1535. ary->DirectAppendItem(input);
  1536. else
  1537. lastSuccessfulMatch = match;
  1538. }
  1539. else
  1540. {
  1541. CharCount copyOffset = 0; // p in spec
  1542. CharCount startOffset = 0; // q in spec
  1543. CharCount inputLimit = inputLength;
  1544. while (startOffset < inputLimit)
  1545. {
  1546. UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, splitPattern, inputLength, startOffset);
  1547. if (match.IsUndefined())
  1548. break;
  1549. lastSuccessfulMatch = match;
  1550. if (match.offset >= inputLimit)
  1551. break;
  1552. startOffset = match.offset;
  1553. CharCount endOffset = match.EndOffset(); // e in spec
  1554. if (endOffset == copyOffset)
  1555. startOffset++;
  1556. else
  1557. {
  1558. AppendSubString(scriptContext, ary, input, copyOffset, startOffset);
  1559. if (ary->GetLength() >= limit)
  1560. break;
  1561. startOffset = copyOffset = endOffset;
  1562. for (int groupId = 1; groupId < numGroups; groupId++)
  1563. {
  1564. ary->DirectAppendItem(GetGroup(scriptContext, splitPattern, input, nonMatchValue, groupId));
  1565. if (ary->GetLength() >= limit)
  1566. break;
  1567. }
  1568. }
  1569. }
  1570. if (ary->GetLength() < limit)
  1571. AppendSubString(scriptContext, ary, input, copyOffset, inputLength);
  1572. }
  1573. PrimEndMatch(state, scriptContext, splitPattern);
  1574. Assert(!splitPattern->IsSticky());
  1575. PropagateLastMatch
  1576. ( scriptContext
  1577. , splitPattern->IsGlobal()
  1578. , /* isSticky */ false
  1579. , regularExpression
  1580. , input
  1581. , lastSuccessfulMatch
  1582. , UnifiedRegex::GroupInfo()
  1583. , /* updateRegex */ true
  1584. , /* updateCtor */ true
  1585. , /* useSplitPattern */ true );
  1586. return ary;
  1587. }
  1588. UnifiedRegex::GroupInfo
  1589. RegexHelper::SimpleMatch(ScriptContext * scriptContext, UnifiedRegex::RegexPattern * pattern, const char16 * input, CharCount inputLength, CharCount offset)
  1590. {
  1591. RegexMatchState state;
  1592. PrimBeginMatch(state, scriptContext, pattern, input, inputLength, false);
  1593. UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1594. PrimEndMatch(state, scriptContext, pattern);
  1595. return match;
  1596. }
  1597. // String.prototype.search (ES5 15.5.4.12)
  1598. Var RegexHelper::RegexSearchImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1599. {
  1600. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1601. const char16* inputStr = input->GetString();
  1602. CharCount inputLength = input->GetLength();
  1603. #if ENABLE_REGEX_CONFIG_OPTIONS
  1604. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Search, regularExpression, input);
  1605. #endif
  1606. UnifiedRegex::GroupInfo match = RegexHelper::SimpleMatch(scriptContext, pattern, inputStr, inputLength, 0);
  1607. PropagateLastMatch(scriptContext, pattern->IsGlobal(), pattern->IsSticky(), regularExpression, input, match, match, false, true);
  1608. return JavascriptNumber::ToVar(match.IsUndefined() ? -1 : (int32)match.offset, scriptContext);
  1609. }
  1610. // String.prototype.split (ES5 15.5.4.14)
  1611. Var RegexHelper::StringSplit(JavascriptString* match, JavascriptString* input, CharCount limit)
  1612. {
  1613. ScriptContext* scriptContext = match->GetScriptContext();
  1614. JavascriptArray* ary;
  1615. CharCount matchLen = match->GetLength();
  1616. if (matchLen == 0)
  1617. {
  1618. CharCount count = min(input->GetLength(), limit);
  1619. ary = scriptContext->GetLibrary()->CreateArray(count);
  1620. const char16 * charString = input->GetString();
  1621. for (CharCount i = 0; i < count; i++)
  1622. {
  1623. ary->DirectSetItemAt(i, scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(charString[i]));
  1624. }
  1625. }
  1626. else
  1627. {
  1628. CharCount i = 0;
  1629. CharCount offset = 0;
  1630. ary = scriptContext->GetLibrary()->CreateArray(0);
  1631. while (i < limit)
  1632. {
  1633. CharCount prevOffset = offset;
  1634. offset = JavascriptString::strstr(input, match, false, prevOffset);
  1635. if (offset != CharCountFlag)
  1636. {
  1637. ary->DirectSetItemAt(i++, SubString::New(input, prevOffset, offset-prevOffset));
  1638. offset += max(matchLen, static_cast<CharCount>(1));
  1639. if (offset > input->GetLength())
  1640. break;
  1641. }
  1642. else
  1643. {
  1644. ary->DirectSetItemAt(i++, SubString::New(input, prevOffset, input->GetLength() - prevOffset));
  1645. break;
  1646. }
  1647. }
  1648. }
  1649. return ary;
  1650. }
  1651. bool RegexHelper::IsResultNotUsed(CallFlags flags)
  1652. {
  1653. return !PHASE_OFF1(Js::RegexResultNotUsedPhase) && ((flags & CallFlags_NotUsed) != 0);
  1654. }
  1655. // ----------------------------------------------------------------------
  1656. // Primitives
  1657. // ----------------------------------------------------------------------
  1658. void RegexHelper::PrimBeginMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, const char16* input, CharCount inputLength, bool alwaysNeedAlloc)
  1659. {
  1660. state.input = input;
  1661. if (pattern->rep.unified.matcher == 0)
  1662. pattern->rep.unified.matcher = UnifiedRegex::Matcher::New(scriptContext, pattern);
  1663. if (alwaysNeedAlloc)
  1664. state.tempAllocatorObj = scriptContext->GetTemporaryAllocator(_u("RegexUnifiedExecTemp"));
  1665. else
  1666. state.tempAllocatorObj = 0;
  1667. }
  1668. UnifiedRegex::GroupInfo
  1669. RegexHelper::PrimMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, CharCount inputLength, CharCount offset)
  1670. {
  1671. Assert(pattern->rep.unified.program != 0);
  1672. Assert(pattern->rep.unified.matcher != 0);
  1673. #if ENABLE_REGEX_CONFIG_OPTIONS
  1674. UnifiedRegex::RegexStats* stats = 0;
  1675. if (REGEX_CONFIG_FLAG(RegexProfile))
  1676. {
  1677. stats = scriptContext->GetRegexStatsDatabase()->GetRegexStats(pattern);
  1678. scriptContext->GetRegexStatsDatabase()->BeginProfile();
  1679. }
  1680. UnifiedRegex::DebugWriter* w = 0;
  1681. if (REGEX_CONFIG_FLAG(RegexTracing) && CONFIG_FLAG(Verbose))
  1682. w = scriptContext->GetRegexDebugWriter();
  1683. #endif
  1684. pattern->rep.unified.matcher->Match
  1685. (state.input
  1686. , inputLength
  1687. , offset
  1688. , scriptContext
  1689. #if ENABLE_REGEX_CONFIG_OPTIONS
  1690. , stats
  1691. , w
  1692. #endif
  1693. );
  1694. #if ENABLE_REGEX_CONFIG_OPTIONS
  1695. if (REGEX_CONFIG_FLAG(RegexProfile))
  1696. scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Execute);
  1697. #endif
  1698. return pattern->GetGroup(0);
  1699. }
  1700. void RegexHelper::PrimEndMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern)
  1701. {
  1702. if (state.tempAllocatorObj != 0)
  1703. scriptContext->ReleaseTemporaryAllocator(state.tempAllocatorObj);
  1704. }
  1705. Var RegexHelper::NonMatchValue(ScriptContext* scriptContext, bool isGlobalCtor)
  1706. {
  1707. // SPEC DEVIATION: The $n properties of the RegExp ctor use empty strings rather than undefined to represent
  1708. // the non-match value, even in ES5 mode.
  1709. if (isGlobalCtor)
  1710. return scriptContext->GetLibrary()->GetEmptyString();
  1711. else
  1712. return scriptContext->GetLibrary()->GetUndefined();
  1713. }
  1714. Var RegexHelper::GetString(ScriptContext* scriptContext, JavascriptString* input, Var nonMatchValue, UnifiedRegex::GroupInfo group)
  1715. {
  1716. if (group.IsUndefined())
  1717. return nonMatchValue;
  1718. switch (group.length)
  1719. {
  1720. case 0:
  1721. return scriptContext->GetLibrary()->GetEmptyString();
  1722. case 1:
  1723. {
  1724. const char16* inputStr = input->GetString();
  1725. return scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(inputStr[group.offset]);
  1726. }
  1727. case 2:
  1728. {
  1729. const char16* inputStr = input->GetString();
  1730. PropertyString* propString = scriptContext->GetPropertyString2(inputStr[group.offset], inputStr[group.offset + 1]);
  1731. if (propString != 0)
  1732. return propString;
  1733. // fall-through for default
  1734. }
  1735. default:
  1736. return SubString::New(input, group.offset, group.length);
  1737. }
  1738. }
  1739. Var RegexHelper::GetGroup(ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, JavascriptString* input, Var nonMatchValue, int groupId)
  1740. {
  1741. return GetString(scriptContext, input, nonMatchValue, pattern->GetGroup(groupId));
  1742. }
  1743. // ======================================================================
  1744. // Match results propagate into three places:
  1745. // - The match result array. Generally the array has string entries for the overall match substring,
  1746. // followed by final bindings for each group, plus the fields:
  1747. // - 'input': string used in match
  1748. // - 'index': index of first character of match in input
  1749. // - 'lastIndex' (IE extension): one plus index of last character of match in input
  1750. // However, for String.match with a global match, the result is an array of all match results
  1751. // (ignoring any group bindings). But in IE8 mode we also bind the above fields to that array,
  1752. // using the results of the last successful primitive match.
  1753. // - The regular expression object has writable field:
  1754. // - 'lastIndex': one plus index of last character of last match in last input
  1755. // - 'lastInput
  1756. // - (Host extension) The RegExp constructor object has fields:
  1757. // - '$n': last match substrings, using "" for undefined in all modes
  1758. // - etc (see JavascriptRegExpConstructorType.cpp)
  1759. //
  1760. // There are also three influences on what gets propagated where and when:
  1761. // - Whether the regular expression is global
  1762. // - Whether the primitive operations runs the regular expression until failure (e.g. String.match) or
  1763. // just once (e.g. RegExp.exec), or use the underlying matching machinery implicitly (e.g. String.split).
  1764. //
  1765. // Here are the rules:
  1766. // - RegExp is updated for the last *successful* primitive match, except for String.replace.
  1767. // In particular, for String.match with a global regex, the final failing match *does not* reset RegExp.
  1768. // - Except for String.search in EC5 mode (which does not update 'lastIndex'), the regular expressions
  1769. // lastIndex is updated as follows:
  1770. // - ES5 mode, if a primitive match fails then the regular expression 'lastIndex' is set to 0. In particular,
  1771. // the final failing primitive match for String.match with a global regex forces 'lastIndex' to be reset.
  1772. // However, if a primitive match succeeds then the regular expression 'lastIndex' is updated only for
  1773. // a global regex.
  1774. // for success. However:
  1775. // - The last failing match in a String.match with a global regex does NOT reset 'lastIndex'.
  1776. // - If the regular expression matched empty, the last index is set assuming the pattern actually matched
  1777. // one input character. This applies even if the pattern matched empty one beyond the end of the string
  1778. // in a String.match with a global regex (!). For our own sanity, we isolate this particular case
  1779. // within JavascriptRegExp when setting the lastIndexVar value.
  1780. // - In all modes, 'lastIndex' determines the starting search index only for global regular expressions.
  1781. //
  1782. // ======================================================================
  1783. void RegexHelper::PropagateLastMatch
  1784. ( ScriptContext* scriptContext
  1785. , bool isGlobal
  1786. , bool isSticky
  1787. , JavascriptRegExp* regularExpression
  1788. , JavascriptString* lastInput
  1789. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  1790. , UnifiedRegex::GroupInfo lastActualMatch
  1791. , bool updateRegex
  1792. , bool updateCtor
  1793. , bool useSplitPattern )
  1794. {
  1795. if (updateRegex)
  1796. {
  1797. PropagateLastMatchToRegex(scriptContext, isGlobal, isSticky, regularExpression, lastSuccessfulMatch, lastActualMatch);
  1798. }
  1799. if (updateCtor)
  1800. {
  1801. PropagateLastMatchToCtor(scriptContext, regularExpression, lastInput, lastSuccessfulMatch, useSplitPattern);
  1802. }
  1803. }
  1804. void RegexHelper::PropagateLastMatchToRegex
  1805. ( ScriptContext* scriptContext
  1806. , bool isGlobal
  1807. , bool isSticky
  1808. , JavascriptRegExp* regularExpression
  1809. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  1810. , UnifiedRegex::GroupInfo lastActualMatch )
  1811. {
  1812. if (lastActualMatch.IsUndefined())
  1813. {
  1814. regularExpression->SetLastIndex(0);
  1815. }
  1816. else if (isGlobal || isSticky)
  1817. {
  1818. CharCount lastIndex = lastActualMatch.EndOffset();
  1819. Assert(lastIndex <= MaxCharCount);
  1820. regularExpression->SetLastIndex((int32)lastIndex);
  1821. }
  1822. }
  1823. void RegexHelper::PropagateLastMatchToCtor
  1824. ( ScriptContext* scriptContext
  1825. , JavascriptRegExp* regularExpression
  1826. , JavascriptString* lastInput
  1827. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  1828. , bool useSplitPattern )
  1829. {
  1830. Assert(lastInput);
  1831. if (!lastSuccessfulMatch.IsUndefined())
  1832. {
  1833. // Notes:
  1834. // - SPEC DEVIATION: The RegExp ctor holds some details of the last successful match on any regular expression.
  1835. // - For updating regex ctor's stats we are using entry function's context, rather than regex context,
  1836. // the rational is: use same context of RegExp.prototype, on which the function was called.
  1837. // So, if you call the function with remoteContext.regexInstance.exec.call(localRegexInstance, "match string"),
  1838. // we will update stats in the context related to the exec function, i.e. remoteContext.
  1839. // This is consistent with other browsers
  1840. UnifiedRegex::RegexPattern* pattern = useSplitPattern
  1841. ? regularExpression->GetSplitPattern()
  1842. : regularExpression->GetPattern();
  1843. scriptContext->GetLibrary()->GetRegExpConstructor()->SetLastMatch(pattern, lastInput, lastSuccessfulMatch);
  1844. }
  1845. }
  1846. void RegexHelper::InvalidateLastMatchOnCtor(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* lastInput, bool useSplitPattern)
  1847. {
  1848. Assert(lastInput);
  1849. UnifiedRegex::RegexPattern* pattern = useSplitPattern
  1850. ? regularExpression->GetSplitPattern()
  1851. : regularExpression->GetPattern();
  1852. scriptContext->GetLibrary()->GetRegExpConstructor()->InvalidateLastMatch(pattern, lastInput);
  1853. }
  1854. bool RegexHelper::GetInitialOffset(bool isGlobal, bool isSticky, JavascriptRegExp* regularExpression, CharCount inputLength, CharCount& offset)
  1855. {
  1856. if (isGlobal || isSticky)
  1857. {
  1858. offset = regularExpression->GetLastIndex();
  1859. if (offset <= MaxCharCount)
  1860. return true;
  1861. else
  1862. {
  1863. regularExpression->SetLastIndex(0);
  1864. return false;
  1865. }
  1866. }
  1867. else
  1868. {
  1869. offset = 0;
  1870. return true;
  1871. }
  1872. }
  1873. JavascriptArray* RegexHelper::CreateMatchResult(void *const stackAllocationPointer, ScriptContext* scriptContext, bool isGlobal, int numGroups, JavascriptString* input)
  1874. {
  1875. if (isGlobal)
  1876. {
  1877. // Use an ordinary array, with default initial capacity
  1878. return scriptContext->GetLibrary()->CreateArrayOnStack(stackAllocationPointer);
  1879. }
  1880. else
  1881. return JavascriptRegularExpressionResult::Create(stackAllocationPointer, numGroups, input, scriptContext);
  1882. }
  1883. void RegexHelper::FinalizeMatchResult(ScriptContext* scriptContext, bool isGlobal, JavascriptArray* arr, UnifiedRegex::GroupInfo match)
  1884. {
  1885. if (!isGlobal)
  1886. JavascriptRegularExpressionResult::SetMatch(arr, match);
  1887. // else: arr is an ordinary array
  1888. }
  1889. JavascriptArray* RegexHelper::CreateExecResult(void *const stackAllocationPointer, ScriptContext* scriptContext, int numGroups, JavascriptString* input, UnifiedRegex::GroupInfo match)
  1890. {
  1891. JavascriptArray* res = JavascriptRegularExpressionResult::Create(stackAllocationPointer, numGroups, input, scriptContext);
  1892. JavascriptRegularExpressionResult::SetMatch(res, match);
  1893. return res;
  1894. }
  1895. template<bool mustMatchEntireInput>
  1896. BOOL RegexHelper::RegexTest_NonScript(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength)
  1897. {
  1898. // This version of the function should only be used when testing the regex against a non-javascript string. That is,
  1899. // this call was not initiated by script code. Hence, the RegExp constructor is not updated with the last match. If
  1900. // 'mustMatchEntireInput' is true, this function also ignores the global/sticky flag and the lastIndex property, since it tests
  1901. // for a match on the entire input string; in that case, the lastIndex property is not modified.
  1902. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1903. UnifiedRegex::GroupInfo match; // initially undefined
  1904. #if ENABLE_REGEX_CONFIG_OPTIONS
  1905. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Test, regularExpression, input, inputLength);
  1906. #endif
  1907. const bool isGlobal = pattern->IsGlobal();
  1908. const bool isSticky = pattern->IsSticky();
  1909. CharCount offset;
  1910. if (mustMatchEntireInput)
  1911. offset = 0; // needs to match the entire input, so ignore 'lastIndex' and always start from the beginning
  1912. else if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
  1913. return false;
  1914. if (mustMatchEntireInput || offset <= inputLength)
  1915. {
  1916. match = RegexHelper::SimpleMatch(scriptContext, pattern, input, inputLength, offset);
  1917. }
  1918. // else: match remains undefined
  1919. if (!mustMatchEntireInput) // don't update 'lastIndex' when mustMatchEntireInput is true since the global flag is ignored
  1920. {
  1921. PropagateLastMatchToRegex(scriptContext, isGlobal, isSticky, regularExpression, match, match);
  1922. }
  1923. return mustMatchEntireInput ? match.offset == 0 && match.length == inputLength : !match.IsUndefined();
  1924. }
  1925. // explicit instantiation
  1926. template BOOL RegexHelper::RegexTest_NonScript<true>(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength);
  1927. template BOOL RegexHelper::RegexTest_NonScript<false>(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength);
  1928. // Asserts if the value needs to be marshaled to target context.
  1929. // Returns the resulting value.
  1930. // This is supposed to be called for result/return value of the RegexXXX functions.
  1931. // static
  1932. template<typename T>
  1933. T RegexHelper::CheckCrossContextAndMarshalResult(T value, ScriptContext* targetContext)
  1934. {
  1935. Assert(targetContext);
  1936. Assert(!CrossSite::NeedMarshalVar(value, targetContext));
  1937. return value;
  1938. }
  1939. Var RegexHelper::RegexMatchResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1940. {
  1941. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false);
  1942. }
  1943. Var RegexHelper::RegexMatchResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1944. {
  1945. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false, stackAllocationPointer);
  1946. }
  1947. Var RegexHelper::RegexMatchResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1948. {
  1949. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  1950. {
  1951. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, true);
  1952. }
  1953. else
  1954. {
  1955. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false);
  1956. }
  1957. }
  1958. Var RegexHelper::RegexMatch(ScriptContext* entryFunctionContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  1959. {
  1960. Var result = RegexHelper::RegexMatchImpl<true>(entryFunctionContext, thisObj, input, noResult, stackAllocationPointer);
  1961. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  1962. }
  1963. Var RegexHelper::RegexMatchNoHistory(ScriptContext* entryFunctionContext, JavascriptRegExp *regularExpression, JavascriptString *input, bool noResult)
  1964. {
  1965. // RegexMatchNoHistory() is used only by Intl internally and there is no need for ES6
  1966. // observable RegExp actions. Therefore, we can directly use the ES5 logic.
  1967. Var result = RegexHelper::RegexEs5MatchImpl<false>(entryFunctionContext, regularExpression, input, noResult);
  1968. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  1969. }
  1970. Var RegexHelper::RegexExecResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1971. {
  1972. return RegexHelper::RegexExec(scriptContext, regularExpression, input, false);
  1973. }
  1974. Var RegexHelper::RegexExecResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1975. {
  1976. return RegexHelper::RegexExec(scriptContext, regularExpression, input, false, stackAllocationPointer);
  1977. }
  1978. Var RegexHelper::RegexExecResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1979. {
  1980. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  1981. {
  1982. return RegexHelper::RegexExec(scriptContext, regularExpression, input, true);
  1983. }
  1984. else
  1985. {
  1986. return RegexHelper::RegexExec(scriptContext, regularExpression, input, false);
  1987. }
  1988. }
  1989. Var RegexHelper::RegexExec(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer)
  1990. {
  1991. Var result = RegexHelper::RegexExecImpl(entryFunctionContext, regularExpression, input, noResult, stackAllocationPointer);
  1992. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  1993. }
  1994. Var RegexHelper::RegexReplaceResultUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace)
  1995. {
  1996. return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
  1997. ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, false)
  1998. : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, false);
  1999. }
  2000. Var RegexHelper::RegexReplaceResultNotUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace)
  2001. {
  2002. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  2003. {
  2004. return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
  2005. ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, true)
  2006. : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, true);
  2007. }
  2008. else
  2009. {
  2010. return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
  2011. ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, false)
  2012. : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, false);
  2013. }
  2014. }
  2015. Var RegexHelper::RegexReplace(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
  2016. {
  2017. Var result = RegexHelper::RegexReplaceImpl(entryFunctionContext, thisObj, input, replace, noResult);
  2018. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2019. }
  2020. Var RegexHelper::RegexEs5Replace(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult)
  2021. {
  2022. // We can have RegexReplaceResult... functions defer their job to RegexReplace. However, their regularExpression argument
  2023. // would first be cast to RecyclableObject when the call is made, and then back to JavascriptRegExp in RegexReplaceImpl.
  2024. // The conversion back slows down the perf, so we use this ES5 version of RegexReplace in RegexReplaceResult... if we know
  2025. // that the ES6 logic isn't needed.
  2026. Var result = RegexHelper::RegexEs5ReplaceImpl(entryFunctionContext, regularExpression, input, replace, noResult);
  2027. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2028. }
  2029. Var RegexHelper::RegexReplaceFunction(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replacefn)
  2030. {
  2031. Var result = RegexHelper::RegexReplaceImpl(entryFunctionContext, thisObj, input, replacefn);
  2032. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2033. }
  2034. Var RegexHelper::RegexSearch(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  2035. {
  2036. Var result = RegexHelper::RegexSearchImpl(entryFunctionContext, regularExpression, input);
  2037. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2038. }
  2039. Var RegexHelper::RegexSplitResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
  2040. {
  2041. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false);
  2042. }
  2043. Var RegexHelper::RegexSplitResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
  2044. {
  2045. Assert(ThreadContext::IsOnStack(stackAllocationPointer));
  2046. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false, stackAllocationPointer);
  2047. }
  2048. Var RegexHelper::RegexSplitResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
  2049. {
  2050. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  2051. {
  2052. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, true);
  2053. }
  2054. else
  2055. {
  2056. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false);
  2057. }
  2058. }
  2059. Var RegexHelper::RegexSplit(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  2060. {
  2061. Var result = RegexHelper::RegexSplitImpl(entryFunctionContext, thisObj, input, limit, noResult, stackAllocationPointer);
  2062. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2063. }
  2064. RecyclableObject* RegexHelper::ExecResultToRecyclableObject(Var result)
  2065. {
  2066. // "result" is the result of the "exec" call. "CallExec" makes sure that it is either
  2067. // an Object or Null. RegExp algorithms have special conditions for when the result is Null,
  2068. // so we can directly cast to RecyclableObject.
  2069. Assert(!JavascriptOperators::IsNull(result));
  2070. return RecyclableObject::UnsafeFromVar(result);
  2071. }
  2072. JavascriptString* RegexHelper::GetMatchStrFromResult(RecyclableObject* result, ScriptContext* scriptContext)
  2073. {
  2074. return JavascriptConversion::ToString(
  2075. JavascriptOperators::GetItem(result, (uint32)0, scriptContext),
  2076. scriptContext);
  2077. }
  2078. void RegexHelper::AdvanceLastIndex(
  2079. RecyclableObject* instance,
  2080. JavascriptString* input,
  2081. JavascriptString* matchStr,
  2082. bool unicode,
  2083. ScriptContext* scriptContext)
  2084. {
  2085. if (matchStr->GetLength() == 0)
  2086. {
  2087. CharCount lastIndex = JavascriptRegExp::GetLastIndexProperty(instance, scriptContext);
  2088. lastIndex = AdvanceStringIndex(input, lastIndex, unicode);
  2089. JavascriptRegExp::SetLastIndexProperty(instance, lastIndex, scriptContext);
  2090. }
  2091. }
  2092. CharCount RegexHelper::AdvanceStringIndex(JavascriptString* string, CharCount index, bool isUnicode)
  2093. {
  2094. // TODO: Change the increment to 2 depending on the "unicode" flag and
  2095. // the code point at "index". The increment is currently constant at 1
  2096. // in order to be compatible with the rest of the RegExp code.
  2097. return JavascriptRegExp::AddIndex(index, 1);
  2098. }
  2099. }