RegexHelper.cpp 103 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeLibraryPch.h"
  6. // Parser Includes
  7. #include "DebugWriter.h"
  8. #include "RegexStats.h"
  9. #include "OctoquadIdentifier.h"
  10. #include "RegexCompileTime.h"
  11. #include "RegexParser.h"
  12. #include "RegexPattern.h"
  13. namespace Js
  14. {
  15. // ----------------------------------------------------------------------
  16. // Dynamic compilation
  17. // ----------------------------------------------------------------------
  18. // See also:
  19. // UnifiedRegex::Parser::Options(...)
  20. bool RegexHelper::GetFlags(Js::ScriptContext* scriptContext, __in_ecount(strLen) const char16* str, CharCount strLen, UnifiedRegex::RegexFlags &flags)
  21. {
  22. for (CharCount i = 0; i < strLen; i++)
  23. {
  24. switch (str[i])
  25. {
  26. case 'i':
  27. if ((flags & UnifiedRegex::IgnoreCaseRegexFlag) != 0)
  28. return false;
  29. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::IgnoreCaseRegexFlag);
  30. break;
  31. case 'g':
  32. if ((flags & UnifiedRegex::GlobalRegexFlag) != 0)
  33. return false;
  34. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::GlobalRegexFlag);
  35. break;
  36. case 'm':
  37. if ((flags & UnifiedRegex::MultilineRegexFlag) != 0)
  38. return false;
  39. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::MultilineRegexFlag);
  40. break;
  41. case 's':
  42. if (scriptContext->GetConfig()->IsES2018RegExDotAllEnabled())
  43. {
  44. if ((flags & UnifiedRegex::DotAllRegexFlag) != 0)
  45. return false;
  46. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::DotAllRegexFlag);
  47. break;
  48. }
  49. return false;
  50. case 'u':
  51. if (scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled())
  52. {
  53. if((flags & UnifiedRegex::UnicodeRegexFlag) != 0)
  54. return false;
  55. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::UnicodeRegexFlag);
  56. break;
  57. }
  58. return false;
  59. case 'y':
  60. if (scriptContext->GetConfig()->IsES6RegExStickyEnabled())
  61. {
  62. if ((flags & UnifiedRegex::StickyRegexFlag) != 0)
  63. return false;
  64. flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::StickyRegexFlag);
  65. break;
  66. }
  67. return false;
  68. default:
  69. return false;
  70. }
  71. }
  72. return true;
  73. }
  74. UnifiedRegex::RegexPattern* RegexHelper::CompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource)
  75. {
  76. Assert(psz != 0 && psz[csz] == 0);
  77. Assert(pszOpts != 0 || cszOpts == 0);
  78. Assert(pszOpts == 0 || pszOpts[cszOpts] == 0);
  79. UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
  80. if (pszOpts != NULL)
  81. {
  82. if (!GetFlags(scriptContext, pszOpts, cszOpts, flags))
  83. {
  84. // Compile in order to throw appropriate error for ill-formed flags
  85. PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
  86. Assert(false);
  87. }
  88. }
  89. if(isLiteralSource)
  90. {
  91. // The source is from a literal regex, so we're cloning a literal regex. Don't use the dynamic regex MRU map since
  92. // these literal regex patterns' lifetimes are tied with the function body.
  93. return PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
  94. }
  95. UnifiedRegex::RegexKey lookupKey(psz, csz, flags);
  96. UnifiedRegex::RegexPattern* pattern = nullptr;
  97. RegexPatternMruMap* dynamicRegexMap = scriptContext->GetDynamicRegexMap();
  98. if (!dynamicRegexMap->TryGetValue(lookupKey, &pattern))
  99. {
  100. pattern = PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
  101. // WARNING: Must calculate key again so that dictionary has copy of source associated with the pattern
  102. const auto source = pattern->GetSource();
  103. UnifiedRegex::RegexKey finalKey(source.GetBuffer(), source.GetLength(), flags);
  104. dynamicRegexMap->Add(finalKey, pattern);
  105. }
  106. return pattern;
  107. }
  108. UnifiedRegex::RegexPattern* RegexHelper::CompileDynamic(
  109. ScriptContext *scriptContext, const char16* psz, CharCount csz, UnifiedRegex::RegexFlags flags, bool isLiteralSource)
  110. {
  111. //
  112. // Regex compilations are mostly string parsing based. To avoid duplicating validation rules,
  113. // generate a trivial options string right here on the stack and delegate to the string parsing
  114. // based implementation.
  115. //
  116. const CharCount OPT_BUF_SIZE = 7;
  117. char16 opts[OPT_BUF_SIZE];
  118. CharCount i = 0;
  119. if (flags & UnifiedRegex::IgnoreCaseRegexFlag)
  120. {
  121. opts[i++] = _u('i');
  122. }
  123. if (flags & UnifiedRegex::GlobalRegexFlag)
  124. {
  125. opts[i++] = _u('g');
  126. }
  127. if (flags & UnifiedRegex::MultilineRegexFlag)
  128. {
  129. opts[i++] = _u('m');
  130. }
  131. if (flags & UnifiedRegex::DotAllRegexFlag)
  132. {
  133. Assert(scriptContext->GetConfig()->IsES2018RegExDotAllEnabled());
  134. opts[i++] = _u('s');
  135. }
  136. if (flags & UnifiedRegex::UnicodeRegexFlag)
  137. {
  138. Assert(scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
  139. opts[i++] = _u('u');
  140. }
  141. if (flags & UnifiedRegex::StickyRegexFlag)
  142. {
  143. Assert(scriptContext->GetConfig()->IsES6RegExStickyEnabled());
  144. opts[i++] = _u('y');
  145. }
  146. Assert(i < OPT_BUF_SIZE);
  147. opts[i] = NULL;
  148. return CompileDynamic(scriptContext, psz, csz, opts, i, isLiteralSource);
  149. }
  150. UnifiedRegex::RegexPattern* RegexHelper::PrimCompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource)
  151. {
  152. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  153. // SEE ALSO: Scanner<EncodingPolicy>::ScanRegExpConstant()
  154. #ifdef PROFILE_EXEC
  155. scriptContext->ProfileBegin(Js::RegexCompilePhase);
  156. #endif
  157. ArenaAllocator* rtAllocator = scriptContext->RegexAllocator();
  158. #if ENABLE_REGEX_CONFIG_OPTIONS
  159. UnifiedRegex::DebugWriter *dw = 0;
  160. if (REGEX_CONFIG_FLAG(RegexDebug))
  161. dw = scriptContext->GetRegexDebugWriter();
  162. UnifiedRegex::RegexStats* stats = 0;
  163. #endif
  164. UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
  165. if(csz == 0 && cszOpts == 0)
  166. {
  167. // Fast path for compiling the empty regex with empty flags, for the RegExp constructor object and other cases.
  168. // These empty regexes are dynamic regexes and so this fast path only exists for dynamic regex compilation. The
  169. // standard chars in particular, do not need to be initialized to compile this regex.
  170. UnifiedRegex::Program* program = UnifiedRegex::Program::New(scriptContext->GetRecycler(), flags);
  171. UnifiedRegex::Parser<NullTerminatedUnicodeEncodingPolicy, false>::CaptureEmptySourceAndNoGroups(program);
  172. UnifiedRegex::RegexPattern* pattern = UnifiedRegex::RegexPattern::New(scriptContext, program, false);
  173. UnifiedRegex::Compiler::CompileEmptyRegex
  174. ( program
  175. , pattern
  176. #if ENABLE_REGEX_CONFIG_OPTIONS
  177. , dw
  178. , stats
  179. #endif
  180. );
  181. #ifdef PROFILE_EXEC
  182. scriptContext->ProfileEnd(Js::RegexCompilePhase);
  183. #endif
  184. return pattern;
  185. }
  186. #if ENABLE_REGEX_CONFIG_OPTIONS
  187. if (REGEX_CONFIG_FLAG(RegexProfile))
  188. scriptContext->GetRegexStatsDatabase()->BeginProfile();
  189. #endif
  190. BEGIN_TEMP_ALLOCATOR(ctAllocator, scriptContext, _u("UnifiedRegexParseAndCompile"));
  191. UnifiedRegex::StandardChars<char16>* standardChars = scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
  192. UnifiedRegex::Node* root = 0;
  193. UnifiedRegex::Parser<NullTerminatedUnicodeEncodingPolicy, false> parser
  194. ( scriptContext
  195. , ctAllocator
  196. , standardChars
  197. , standardChars
  198. , false
  199. #if ENABLE_REGEX_CONFIG_OPTIONS
  200. , dw
  201. #endif
  202. );
  203. try
  204. {
  205. root = parser.ParseDynamic(psz, psz + csz, pszOpts, pszOpts + cszOpts, flags);
  206. }
  207. catch (UnifiedRegex::ParseError e)
  208. {
  209. END_TEMP_ALLOCATOR(ctAllocator, scriptContext);
  210. #ifdef PROFILE_EXEC
  211. scriptContext->ProfileEnd(Js::RegexCompilePhase);
  212. #endif
  213. Js::JavascriptError::ThrowSyntaxError(scriptContext, e.error);
  214. // never reached
  215. }
  216. const auto recycler = scriptContext->GetRecycler();
  217. UnifiedRegex::Program* program = UnifiedRegex::Program::New(recycler, flags);
  218. parser.CaptureSourceAndGroups(recycler, program, psz, csz, csz);
  219. UnifiedRegex::RegexPattern* pattern = UnifiedRegex::RegexPattern::New(scriptContext, program, isLiteralSource);
  220. #if ENABLE_REGEX_CONFIG_OPTIONS
  221. if (REGEX_CONFIG_FLAG(RegexProfile))
  222. {
  223. stats = scriptContext->GetRegexStatsDatabase()->GetRegexStats(pattern);
  224. scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Parse);
  225. }
  226. if (REGEX_CONFIG_FLAG(RegexTracing))
  227. {
  228. UnifiedRegex::DebugWriter* tw = scriptContext->GetRegexDebugWriter();
  229. tw->Print(_u("// REGEX COMPILE "));
  230. pattern->Print(tw);
  231. tw->EOL();
  232. }
  233. if (REGEX_CONFIG_FLAG(RegexProfile))
  234. scriptContext->GetRegexStatsDatabase()->BeginProfile();
  235. #endif
  236. UnifiedRegex::Compiler::Compile
  237. ( scriptContext
  238. , ctAllocator
  239. , rtAllocator
  240. , standardChars
  241. , program
  242. , root
  243. , parser.GetLitbuf()
  244. , pattern
  245. #if ENABLE_REGEX_CONFIG_OPTIONS
  246. , dw
  247. , stats
  248. #endif
  249. );
  250. #if ENABLE_REGEX_CONFIG_OPTIONS
  251. if (REGEX_CONFIG_FLAG(RegexProfile))
  252. scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Compile);
  253. #endif
  254. END_TEMP_ALLOCATOR(ctAllocator, scriptContext);
  255. #ifdef PROFILE_EXEC
  256. scriptContext->ProfileEnd(Js::RegexCompilePhase);
  257. #endif
  258. return pattern;
  259. }
  260. // ----------------------------------------------------------------------
  261. // Primitives
  262. // ----------------------------------------------------------------------
  263. #if ENABLE_REGEX_CONFIG_OPTIONS
  264. static void RegexHelperTrace(
  265. ScriptContext* scriptContext,
  266. UnifiedRegex::RegexStats::Use use,
  267. JavascriptRegExp* regExp,
  268. const char16 *const input,
  269. const CharCount inputLength,
  270. const char16 *const replace = 0,
  271. const CharCount replaceLength = 0)
  272. {
  273. Assert(regExp);
  274. Assert(input);
  275. if (REGEX_CONFIG_FLAG(RegexProfile))
  276. {
  277. UnifiedRegex::RegexStats* stats =
  278. scriptContext->GetRegexStatsDatabase()->GetRegexStats(regExp->GetPattern());
  279. stats->useCounts[use]++;
  280. stats->inputLength += inputLength;
  281. }
  282. if (REGEX_CONFIG_FLAG(RegexTracing))
  283. {
  284. UnifiedRegex::DebugWriter* w = scriptContext->GetRegexDebugWriter();
  285. w->Print(_u("%s("), UnifiedRegex::RegexStats::UseNames[use]);
  286. regExp->GetPattern()->Print(w);
  287. w->Print(_u(", "));
  288. if (!CONFIG_FLAG(Verbose) && inputLength > 1024)
  289. w->Print(_u("\"<string too large>\""));
  290. else
  291. w->PrintQuotedString(input, inputLength);
  292. if (replace != 0)
  293. {
  294. Assert(use == UnifiedRegex::RegexStats::Replace);
  295. w->Print(_u(", "));
  296. if (!CONFIG_FLAG(Verbose) && replaceLength > 1024)
  297. w->Print(_u("\"<string too large>\""));
  298. else
  299. w->PrintQuotedString(replace, replaceLength);
  300. }
  301. w->PrintEOL(_u(");"));
  302. w->Flush();
  303. }
  304. }
  305. static void RegexHelperTrace(ScriptContext* scriptContext, UnifiedRegex::RegexStats::Use use, JavascriptRegExp* regExp, JavascriptString* input)
  306. {
  307. Assert(regExp);
  308. Assert(input);
  309. RegexHelperTrace(scriptContext, use, regExp, input->GetString(), input->GetLength());
  310. }
  311. static void RegexHelperTrace(ScriptContext* scriptContext, UnifiedRegex::RegexStats::Use use, JavascriptRegExp* regExp, JavascriptString* input, JavascriptString* replace)
  312. {
  313. Assert(regExp);
  314. Assert(input);
  315. Assert(replace);
  316. RegexHelperTrace(scriptContext, use, regExp, input->GetString(), input->GetLength(), replace->GetString(), replace->GetLength());
  317. }
  318. #endif
  319. // ----------------------------------------------------------------------
  320. // Regex entry points
  321. // ----------------------------------------------------------------------
  322. struct RegexMatchState
  323. {
  324. const char16* input;
  325. TempArenaAllocatorObject* tempAllocatorObj;
  326. UnifiedRegex::Matcher* matcher;
  327. };
  328. template <bool updateHistory>
  329. Var RegexHelper::RegexMatchImpl(ScriptContext* scriptContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  330. {
  331. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  332. // Normally, this check would be done in JavascriptRegExp::EntrySymbolMatch. However,
  333. // since the lowerer inlines String.prototype.match and directly calls the helper,
  334. // the check then would be bypassed. That's the reason we do the check here.
  335. if (scriptConfig->IsES6RegExSymbolsEnabled()
  336. && IsRegexSymbolMatchObservable(thisObj, scriptContext))
  337. {
  338. // We don't need to pass "updateHistory" here since the call to "exec" will handle it.
  339. return RegexEs6MatchImpl(scriptContext, thisObj, input, noResult, stackAllocationPointer);
  340. }
  341. else
  342. {
  343. PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
  344. ? _u("RegExp.prototype[Symbol.match]")
  345. : _u("String.prototype.match");
  346. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  347. return RegexEs5MatchImpl<updateHistory>(scriptContext, regularExpression, input, noResult, stackAllocationPointer);
  348. }
  349. }
  350. bool RegexHelper::IsRegexSymbolMatchObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  351. {
  352. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  353. return !JavascriptRegExp::HasOriginalRegExType(instance)
  354. || JavascriptRegExp::HasObservableExec(regexPrototype)
  355. || JavascriptRegExp::HasObservableGlobalFlag(regexPrototype)
  356. || JavascriptRegExp::HasObservableUnicodeFlag(regexPrototype);
  357. }
  358. Var RegexHelper::RegexEs6MatchImpl(ScriptContext* scriptContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  359. {
  360. PCWSTR const varName = _u("RegExp.prototype[Symbol.match]");
  361. if (!JavascriptRegExp::GetGlobalProperty(thisObj, scriptContext))
  362. {
  363. return JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
  364. }
  365. else
  366. {
  367. bool unicode = JavascriptRegExp::GetUnicodeProperty(thisObj, scriptContext);
  368. JavascriptRegExp::SetLastIndexProperty(thisObj, TaggedInt::ToVarUnchecked(0), scriptContext);
  369. JavascriptArray* arrayResult = nullptr;
  370. do
  371. {
  372. Var result = JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
  373. if (JavascriptOperators::IsNull(result))
  374. {
  375. break;
  376. }
  377. RecyclableObject* resultObj = ExecResultToRecyclableObject(result);
  378. JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
  379. if (arrayResult == nullptr)
  380. {
  381. arrayResult = scriptContext->GetLibrary()->CreateArray();
  382. }
  383. arrayResult->DirectAppendItem(matchStr);
  384. AdvanceLastIndex(thisObj, input, matchStr, unicode, scriptContext);
  385. }
  386. while (true);
  387. return arrayResult != nullptr
  388. ? arrayResult
  389. : scriptContext->GetLibrary()->GetNull();
  390. }
  391. }
  392. // String.prototype.match (ES5 15.5.4.10)
  393. template <bool updateHistory>
  394. Var RegexHelper::RegexEs5MatchImpl(ScriptContext* scriptContext, JavascriptRegExp *regularExpression, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  395. {
  396. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  397. const char16* inputStr = input->GetString();
  398. CharCount inputLength = input->GetLength();
  399. #if ENABLE_REGEX_CONFIG_OPTIONS
  400. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Match, regularExpression, input);
  401. #endif
  402. UnifiedRegex::GroupInfo lastSuccessfulMatch; // initially undefined
  403. UnifiedRegex::GroupInfo lastActualMatch; // initially undefined
  404. #ifdef REGEX_TRIGRAMS
  405. UnifiedRegex::TrigramAlphabet* trigramAlphabet = scriptContext->GetTrigramAlphabet();
  406. UnifiedRegex::TrigramInfo* trigramInfo = pattern->rep.unified.trigramInfo;
  407. if (trigramAlphabet != NULL && inputLength >= MinTrigramInputLength && trigramInfo != NULL)
  408. {
  409. if (trigramAlphabet->input == NULL)
  410. {
  411. trigramAlphabet->MegaMatch((char16*)inputStr, inputLength);
  412. }
  413. if (trigramInfo->isTrigramPattern)
  414. {
  415. if (trigramInfo->resultCount > 0)
  416. {
  417. lastSuccessfulMatch.offset = trigramInfo->offsets[trigramInfo->resultCount - 1];
  418. lastSuccessfulMatch.length = UnifiedRegex::TrigramInfo::PatternLength;
  419. }
  420. // else: leave lastMatch undefined
  421. // Make sure a matcher is allocated and holds valid last match in case the RegExp constructor
  422. // needs to fill-in details from the last match via JavascriptRegExpConstructor::EnsureValues
  423. Assert(pattern->rep.unified.program != 0);
  424. if (pattern->rep.unified.matcher == 0)
  425. pattern->rep.unified.matcher = UnifiedRegex::Matcher::New(scriptContext, pattern);
  426. *pattern->rep.unified.matcher->GroupIdToGroupInfo(0) = lastSuccessfulMatch;
  427. Assert(pattern->IsGlobal());
  428. JavascriptArray* arrayResult = CreateMatchResult(stackAllocationPointer, scriptContext, /* isGlobal */ true, pattern->NumGroups(), input);
  429. FinalizeMatchResult(scriptContext, /* isGlobal */ true, arrayResult, lastSuccessfulMatch);
  430. if (trigramInfo->resultCount > 0)
  431. {
  432. if (trigramInfo->hasCachedResultString)
  433. {
  434. for (int k = 0; k < trigramInfo->resultCount; k++)
  435. {
  436. arrayResult->DirectSetItemAt(k,
  437. static_cast<Js::JavascriptString*>(trigramInfo->cachedResult[k]));
  438. }
  439. }
  440. else
  441. {
  442. for (int k = 0; k < trigramInfo->resultCount; k++)
  443. {
  444. JavascriptString * str = SubString::New(input, trigramInfo->offsets[k], UnifiedRegex::TrigramInfo::PatternLength);
  445. trigramInfo->cachedResult[k] = str;
  446. arrayResult->DirectSetItemAt(k, str);
  447. }
  448. trigramInfo->hasCachedResultString = true;
  449. }
  450. } // otherwise, there are no results and null will be returned
  451. if (updateHistory)
  452. {
  453. PropagateLastMatch(scriptContext, /* isGlobal */ true, pattern->IsSticky(), regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  454. }
  455. return lastSuccessfulMatch.IsUndefined() ? scriptContext->GetLibrary()->GetNull() : arrayResult;
  456. }
  457. }
  458. #endif
  459. // If global regex, result array holds substrings for each match, and group bindings are ignored
  460. // If non-global regex, result array holds overall substring and each group binding substring
  461. const bool isGlobal = pattern->IsGlobal();
  462. const bool isSticky = pattern->IsSticky();
  463. JavascriptArray* arrayResult = 0;
  464. RegexMatchState state;
  465. // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
  466. CharCount offset = 0;
  467. if (!isGlobal && isSticky)
  468. {
  469. offset = regularExpression->GetLastIndex();
  470. }
  471. uint32 globalIndex = 0;
  472. PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, false);
  473. do
  474. {
  475. if (offset > inputLength)
  476. {
  477. lastActualMatch.Reset();
  478. break;
  479. }
  480. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  481. if (lastActualMatch.IsUndefined())
  482. break;
  483. lastSuccessfulMatch = lastActualMatch;
  484. if (!noResult)
  485. {
  486. if (arrayResult == 0)
  487. arrayResult = CreateMatchResult(stackAllocationPointer, scriptContext, isGlobal, pattern->NumGroups(), input);
  488. JavascriptString *const matchedString = SubString::New(input, lastActualMatch.offset, lastActualMatch.length);
  489. if (isGlobal)
  490. arrayResult->DirectSetItemAt(globalIndex, matchedString);
  491. else
  492. {
  493. // The array's head segment up to length - 1 may not be filled. Write to the head segment element directly
  494. // instead of calling a helper that expects the segment to be pre-filled.
  495. Assert(globalIndex < arrayResult->GetHead()->length);
  496. static_cast<SparseArraySegment<Var> *>(arrayResult->GetHead())->elements[globalIndex] = matchedString;
  497. }
  498. globalIndex++;
  499. }
  500. offset = lastActualMatch.offset + max(lastActualMatch.length, static_cast<CharCountOrFlag>(1));
  501. } while (isGlobal);
  502. PrimEndMatch(state, scriptContext, pattern);
  503. if (updateHistory)
  504. {
  505. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  506. }
  507. if (arrayResult == 0)
  508. {
  509. return scriptContext->GetLibrary()->GetNull();
  510. }
  511. const int numGroups = pattern->NumGroups();
  512. if (!isGlobal)
  513. {
  514. if (numGroups > 1)
  515. {
  516. // Overall match already captured in index 0 by above, so just grab the groups
  517. Var nonMatchValue = NonMatchValue(scriptContext, false);
  518. Field(Var) *elements = ((SparseArraySegment<Var>*)arrayResult->GetHead())->elements;
  519. for (uint groupId = 1; groupId < (uint)numGroups; groupId++)
  520. {
  521. Assert(groupId < arrayResult->GetHead()->left + arrayResult->GetHead()->length);
  522. elements[groupId] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
  523. }
  524. }
  525. FinalizeMatchResult(scriptContext, /* isGlobal */ false, arrayResult, lastSuccessfulMatch);
  526. }
  527. else
  528. {
  529. FinalizeMatchResult(scriptContext, /* isGlobal */ true, arrayResult, lastSuccessfulMatch);
  530. }
  531. return arrayResult;
  532. }
  533. // RegExp.prototype.exec (ES5 15.10.6.2)
  534. Var RegexHelper::RegexExecImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer)
  535. {
  536. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  537. #if ENABLE_REGEX_CONFIG_OPTIONS
  538. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Exec, regularExpression, input);
  539. #endif
  540. const bool isGlobal = pattern->IsGlobal();
  541. const bool isSticky = pattern->IsSticky();
  542. CharCount offset;
  543. CharCount inputLength = input->GetLength();
  544. if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
  545. {
  546. return scriptContext->GetLibrary()->GetNull();
  547. }
  548. UnifiedRegex::GroupInfo match; // initially undefined
  549. if (offset <= inputLength)
  550. {
  551. const char16* inputStr = input->GetString();
  552. match = SimpleMatch(scriptContext, pattern, inputStr, inputLength, offset);
  553. }
  554. // else: match remains undefined
  555. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, match, match, true, true);
  556. if (noResult || match.IsUndefined())
  557. {
  558. return scriptContext->GetLibrary()->GetNull();
  559. }
  560. const int numGroups = pattern->NumGroups();
  561. Assert(numGroups >= 0);
  562. JavascriptArray* result = CreateExecResult(stackAllocationPointer, scriptContext, numGroups, input, match);
  563. Var nonMatchValue = NonMatchValue(scriptContext, false);
  564. Field(Var) *elements = ((SparseArraySegment<Var>*)result->GetHead())->elements;
  565. for (uint groupId = 0; groupId < (uint)numGroups; groupId++)
  566. {
  567. Assert(groupId < result->GetHead()->left + result->GetHead()->length);
  568. elements[groupId] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
  569. }
  570. return result;
  571. }
  572. Var RegexHelper::RegexTest(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString *input)
  573. {
  574. if (scriptContext->GetConfig()->IsES6RegExSymbolsEnabled()
  575. && IsRegexTestObservable(thisObj, scriptContext))
  576. {
  577. return RegexEs6TestImpl(scriptContext, thisObj, input);
  578. }
  579. else
  580. {
  581. JavascriptRegExp* regularExpression =
  582. JavascriptRegExp::ToRegExp(thisObj, _u("RegExp.prototype.test"), scriptContext);
  583. return RegexEs5TestImpl(scriptContext, regularExpression, input);
  584. }
  585. }
  586. bool RegexHelper::IsRegexTestObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  587. {
  588. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  589. return !JavascriptRegExp::HasOriginalRegExType(instance)
  590. || JavascriptRegExp::HasObservableExec(regexPrototype);
  591. }
  592. Var RegexHelper::RegexEs6TestImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString *input)
  593. {
  594. Var match = JavascriptRegExp::CallExec(thisObj, input, _u("RegExp.prototype.test"), scriptContext);
  595. return JavascriptBoolean::ToVar(!JavascriptOperators::IsNull(match), scriptContext);
  596. }
  597. // RegExp.prototype.test (ES5 15.10.6.3)
  598. Var RegexHelper::RegexEs5TestImpl(ScriptContext* scriptContext, JavascriptRegExp *regularExpression, JavascriptString *input)
  599. {
  600. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  601. const char16* inputStr = input->GetString();
  602. CharCount inputLength = input->GetLength();
  603. UnifiedRegex::GroupInfo match; // initially undefined
  604. const bool isGlobal = pattern->IsGlobal();
  605. const bool isSticky = pattern->IsSticky();
  606. const bool useCache = !isGlobal && !isSticky;
  607. UnifiedRegex::RegExpTestCache* cache = nullptr;
  608. JavascriptString * cachedInput = nullptr;
  609. uint cacheIndex = 0;
  610. bool cacheHit = false;
  611. bool cachedResult = false;
  612. if (useCache)
  613. {
  614. cache = pattern->EnsureTestCache();
  615. cacheIndex = UnifiedRegex::RegexPattern::GetTestCacheIndex(input);
  616. cachedInput = cache->inputArray[cacheIndex] != nullptr ? cache->inputArray[cacheIndex]->Get() : nullptr;
  617. cacheHit = cachedInput == input;
  618. }
  619. #if ENABLE_REGEX_CONFIG_OPTIONS
  620. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Test, regularExpression, input);
  621. UnifiedRegex::RegexPattern::TraceTestCache(cacheHit, input, cachedInput, !useCache);
  622. #endif
  623. if (cacheHit)
  624. {
  625. Assert(useCache);
  626. cachedResult = (cache->resultBV.Test(cacheIndex) != 0);
  627. // If our cache says this test should produce a match (which we aren't going to compute),
  628. // notify the Ctor to invalidate the last match so it must be recomputed before access.
  629. if (cachedResult)
  630. {
  631. InvalidateLastMatchOnCtor(scriptContext, regularExpression, input);
  632. }
  633. // for debug builds, let's still do the real test so we can validate values in the cache
  634. #if !DBG
  635. return JavascriptBoolean::ToVar(cachedResult, scriptContext);
  636. #endif
  637. }
  638. CharCount offset;
  639. if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
  640. {
  641. if (useCache)
  642. {
  643. Assert(offset == 0);
  644. Assert(!cacheHit || cachedInput == input);
  645. Assert(!cacheHit || cachedResult == false);
  646. cache->inputArray[cacheIndex] = regularExpression->GetRecycler()->CreateWeakReferenceHandle(input);
  647. cache->resultBV.Clear(cacheIndex);
  648. }
  649. return scriptContext->GetLibrary()->GetFalse();
  650. }
  651. if (offset <= inputLength)
  652. {
  653. match = SimpleMatch(scriptContext, pattern, inputStr, inputLength, offset);
  654. }
  655. // else: match remains undefined
  656. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, match, match, true, true);
  657. bool wasFound = !match.IsUndefined();
  658. if (useCache)
  659. {
  660. Assert(offset == 0);
  661. Assert(!cacheHit || cachedInput == input);
  662. Assert(!cacheHit || cachedResult == wasFound);
  663. cache->inputArray[cacheIndex] = regularExpression->GetRecycler()->CreateWeakReferenceHandle(input);
  664. if (wasFound)
  665. {
  666. cache->resultBV.Set(cacheIndex);
  667. }
  668. else
  669. {
  670. cache->resultBV.Clear(cacheIndex);
  671. }
  672. }
  673. return JavascriptBoolean::ToVar(wasFound, scriptContext);
  674. }
  675. template<typename GroupFn>
  676. void RegexHelper::ReplaceFormatString
  677. ( ScriptContext* scriptContext
  678. , int numGroups
  679. , GroupFn getGroup
  680. , JavascriptString* input
  681. , const char16* matchedString
  682. , UnifiedRegex::GroupInfo match
  683. , JavascriptString* replace
  684. , int substitutions
  685. , __in_ecount(substitutions) CharCount* substitutionOffsets
  686. , CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& concatenated )
  687. {
  688. Var nonMatchValue = NonMatchValue(scriptContext, false);
  689. const CharCount inputLength = input->GetLength();
  690. const char16* replaceStr = replace->GetString();
  691. const CharCount replaceLength = replace->GetLength();
  692. CharCount offset = 0;
  693. for (int i = 0; i < substitutions; i++)
  694. {
  695. CharCount substitutionOffset = substitutionOffsets[i];
  696. concatenated.Append(replace, offset, substitutionOffset - offset);
  697. char16 currentChar = replaceStr[substitutionOffset + 1];
  698. if (currentChar >= _u('0') && currentChar <= _u('9'))
  699. {
  700. // We've found a substitution ref, like $32. In accordance with the standard (sec-getsubstitution),
  701. // we recognize at most two decimal digits after the dollar sign.
  702. uint16 captureIndex = (uint16)(currentChar - _u('0'));
  703. Assert(captureIndex < 10); // numeric value of single decimal digit
  704. offset = substitutionOffset + 2;
  705. if (offset < replaceLength)
  706. {
  707. currentChar = replaceStr[substitutionOffset + 2];
  708. if (currentChar >= _u('0') && currentChar <= _u('9'))
  709. {
  710. uint16 tempCaptureIndex = (10 * captureIndex) + (uint16)(currentChar - _u('0'));
  711. Assert(tempCaptureIndex < 100); // numeric value of 2-digit positive decimal number
  712. if (tempCaptureIndex < numGroups)
  713. {
  714. captureIndex = tempCaptureIndex;
  715. offset = substitutionOffset + 3;
  716. }
  717. }
  718. }
  719. Assert(captureIndex < 100); // as above, value of 2-digit positive decimal number
  720. if (captureIndex < numGroups && (captureIndex != 0))
  721. {
  722. Var group = getGroup(captureIndex, nonMatchValue);
  723. if (VarIs<JavascriptString>(group))
  724. concatenated.Append(UnsafeVarTo<JavascriptString>(group));
  725. else if (group != nonMatchValue)
  726. concatenated.Append(replace, substitutionOffset, offset - substitutionOffset);
  727. }
  728. else
  729. concatenated.Append(replace, substitutionOffset, offset - substitutionOffset);
  730. }
  731. else
  732. {
  733. switch (currentChar)
  734. {
  735. case _u('$'): // literal '$' character
  736. concatenated.Append(_u('$'));
  737. offset = substitutionOffset + 2;
  738. break;
  739. case _u('&'): // matched string
  740. concatenated.Append(matchedString, match.length);
  741. offset = substitutionOffset + 2;
  742. break;
  743. case _u('`'): // left context
  744. concatenated.Append(input, 0, match.offset);
  745. offset = substitutionOffset + 2;
  746. break;
  747. case _u('\''): // right context
  748. if (match.EndOffset() < inputLength)
  749. {
  750. concatenated.Append(input, match.EndOffset(), inputLength - match.EndOffset());
  751. }
  752. offset = substitutionOffset + 2;
  753. break;
  754. default:
  755. concatenated.Append(_u('$'));
  756. offset = substitutionOffset + 1;
  757. break;
  758. }
  759. }
  760. }
  761. concatenated.Append(replace, offset, replaceLength - offset);
  762. }
  763. int RegexHelper::GetReplaceSubstitutions(const char16 * const replaceStr, CharCount const replaceLength,
  764. ArenaAllocator * const tempAllocator, CharCount** const substitutionOffsetsOut)
  765. {
  766. int substitutions = 0;
  767. for (CharCount i = 0; i < replaceLength; i++)
  768. {
  769. if (replaceStr[i] == _u('$'))
  770. {
  771. if (++i < replaceLength)
  772. {
  773. substitutions++;
  774. }
  775. }
  776. }
  777. if (substitutions > 0)
  778. {
  779. CharCount* substitutionOffsets = AnewArray(tempAllocator, CharCount, substitutions);
  780. substitutions = 0;
  781. for (CharCount i = 0; i < replaceLength; i++)
  782. {
  783. if (replaceStr[i] == _u('$'))
  784. {
  785. if (i < (replaceLength - 1))
  786. {
  787. #pragma prefast(suppress:26000, "index doesn't overflow the buffer")
  788. substitutionOffsets[substitutions] = i;
  789. i++;
  790. substitutions++;
  791. }
  792. }
  793. }
  794. *substitutionOffsetsOut = substitutionOffsets;
  795. }
  796. return substitutions;
  797. }
  798. Var RegexHelper::RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
  799. {
  800. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  801. if (scriptConfig->IsES6RegExSymbolsEnabled() && IsRegexSymbolReplaceObservable(thisObj, scriptContext))
  802. {
  803. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, replace, noResult);
  804. }
  805. else
  806. {
  807. PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
  808. ? _u("RegExp.prototype[Symbol.replace]")
  809. : _u("String.prototype.replace");
  810. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  811. return RegexEs5ReplaceImpl(scriptContext, regularExpression, input, replace, noResult);
  812. }
  813. }
  814. bool RegexHelper::IsRegexSymbolReplaceObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  815. {
  816. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  817. return !JavascriptRegExp::HasOriginalRegExType(instance)
  818. || JavascriptRegExp::HasObservableUnicodeFlag(regexPrototype)
  819. || JavascriptRegExp::HasObservableExec(regexPrototype)
  820. || JavascriptRegExp::HasObservableGlobalFlag(regexPrototype);
  821. }
  822. Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
  823. {
  824. auto appendReplacement = [&](
  825. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& resultBuilder,
  826. ArenaAllocator* tempAlloc,
  827. JavascriptString* matchStr,
  828. int numberOfCaptures,
  829. Field(Var)* captures,
  830. CharCount position)
  831. {
  832. CharCount* substitutionOffsets = nullptr;
  833. int substitutions = GetReplaceSubstitutions(
  834. replace->GetString(),
  835. replace->GetLength(),
  836. tempAlloc,
  837. &substitutionOffsets);
  838. auto getGroup = [&](int captureIndex, Var nonMatchValue) {
  839. return captureIndex <= numberOfCaptures ? PointerValue(captures[captureIndex]) : nonMatchValue;
  840. };
  841. UnifiedRegex::GroupInfo match(position, matchStr->GetLength());
  842. int numGroups = numberOfCaptures + 1; // Take group 0 into account.
  843. ReplaceFormatString(
  844. scriptContext,
  845. numGroups,
  846. getGroup,
  847. input,
  848. matchStr->GetString(),
  849. match,
  850. replace,
  851. substitutions,
  852. substitutionOffsets,
  853. resultBuilder);
  854. };
  855. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, appendReplacement, noResult);
  856. }
  857. Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, RecyclableObject* replaceFn)
  858. {
  859. auto appendReplacement = [&](
  860. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& resultBuilder,
  861. ArenaAllocator* tempAlloc,
  862. JavascriptString* matchStr,
  863. int numberOfCaptures,
  864. Field(Var)* captures,
  865. CharCount position)
  866. {
  867. // replaceFn Arguments:
  868. //
  869. // 0: this
  870. // 1: matched
  871. // 2: capture1
  872. // ...
  873. // N + 1: capture N
  874. // N + 2: position
  875. // N + 3: input
  876. // Number of captures can be at most 99, so we won't overflow.
  877. ushort argCount = (ushort) numberOfCaptures + 4;
  878. PROBE_STACK_NO_DISPOSE(scriptContext, argCount * sizeof(Var));
  879. ThreadContext* threadContext = scriptContext->GetThreadContext();
  880. Var* args = (Var*) _alloca(argCount * sizeof(Var));
  881. args[0] = scriptContext->GetLibrary()->GetUndefined();
  882. #pragma prefast(suppress:6386, "The write is within the bounds")
  883. args[1] = matchStr;
  884. for (int i = 1; i <= numberOfCaptures; ++i)
  885. {
  886. args[i + 1] = captures[i];
  887. }
  888. args[numberOfCaptures + 2] = JavascriptNumber::ToVar(position, scriptContext);
  889. args[numberOfCaptures + 3] = input;
  890. Js::Var replaceFnResult = threadContext->ExecuteImplicitCall(replaceFn, Js::ImplicitCall_Accessor, [=]()->Js::Var
  891. {
  892. return JavascriptFunction::CallFunction<true>(replaceFn, replaceFn->GetEntryPoint(), Arguments(CallInfo(argCount), args));
  893. });
  894. JavascriptString* replace = JavascriptConversion::ToString(replaceFnResult, scriptContext);
  895. resultBuilder.Append(replace);
  896. };
  897. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, appendReplacement, /* noResult */ false);
  898. }
  899. template<typename ReplacementFn>
  900. Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, ReplacementFn appendReplacement, bool noResult)
  901. {
  902. bool global = JavascriptRegExp::GetGlobalProperty(thisObj, scriptContext);
  903. bool unicode = false; // Dummy value. It isn't used below unless "global" is "true".
  904. if (global)
  905. {
  906. unicode = JavascriptRegExp::GetUnicodeProperty(thisObj, scriptContext);
  907. JavascriptRegExp::SetLastIndexProperty(thisObj, TaggedInt::ToVarUnchecked(0), scriptContext);
  908. }
  909. JavascriptString* accumulatedResult = nullptr;
  910. Recycler* recycler = scriptContext->GetRecycler();
  911. JsUtil::List<RecyclableObject*>* results = RecyclerNew(recycler, JsUtil::List<RecyclableObject*>, recycler);
  912. while (true)
  913. {
  914. PCWSTR varName = _u("RegExp.prototype[Symbol.replace]");
  915. Var result = JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
  916. if (JavascriptOperators::IsNull(result))
  917. {
  918. break;
  919. }
  920. RecyclableObject* resultObj = ExecResultToRecyclableObject(result);
  921. results->Add(resultObj);
  922. if (!global)
  923. {
  924. break;
  925. }
  926. JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
  927. AdvanceLastIndex(thisObj, input, matchStr, unicode, scriptContext);
  928. }
  929. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> accumulatedResultBuilder(scriptContext);
  930. CharCount inputLength = input->GetLength();
  931. CharCount nextSourcePosition = 0;
  932. size_t previousNumberOfCapturesToKeep = 0;
  933. Field(Var)* captures = nullptr;
  934. BEGIN_TEMP_ALLOCATOR(tempAlloc, scriptContext, _u("RegexHelper"))
  935. {
  936. results->Map([&](int resultIndex, RecyclableObject* resultObj) {
  937. int64 length = JavascriptConversion::ToLength(
  938. JavascriptOperators::GetProperty(resultObj, PropertyIds::length, scriptContext),
  939. scriptContext);
  940. uint64 numberOfCaptures = (uint64) max(length - 1, (int64) 0);
  941. JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
  942. int64 index = JavascriptConversion::ToLength(
  943. JavascriptOperators::GetProperty(resultObj, PropertyIds::index, scriptContext),
  944. scriptContext);
  945. CharCount position = max(
  946. min(JavascriptRegExp::GetIndexOrMax(index), inputLength),
  947. (CharCount) 0);
  948. // Capture groups can be referenced using at most two digits.
  949. const uint64 maxNumberOfCaptures = 99;
  950. size_t numberOfCapturesToKeep = (size_t) min(numberOfCaptures, maxNumberOfCaptures);
  951. if (captures == nullptr)
  952. {
  953. captures = RecyclerNewArray(recycler, Field(Var), numberOfCapturesToKeep + 1);
  954. }
  955. else if (numberOfCapturesToKeep != previousNumberOfCapturesToKeep)
  956. {
  957. size_t existingBytes = (previousNumberOfCapturesToKeep + 1) * sizeof(Var*);
  958. size_t requestedBytes = (numberOfCapturesToKeep + 1) * sizeof(Var*);
  959. captures = (Field(Var)*) recycler->Realloc(captures, existingBytes, requestedBytes);
  960. }
  961. previousNumberOfCapturesToKeep = numberOfCapturesToKeep;
  962. for (uint64 i = 1; i <= numberOfCaptures; ++i)
  963. {
  964. Var nextCapture = JavascriptOperators::GetItem(resultObj, i, scriptContext);
  965. if (!JavascriptOperators::IsUndefined(nextCapture))
  966. {
  967. nextCapture = JavascriptConversion::ToString(nextCapture, scriptContext);
  968. }
  969. if (i <= numberOfCapturesToKeep)
  970. {
  971. captures[i] = nextCapture;
  972. }
  973. }
  974. if (position >= nextSourcePosition)
  975. {
  976. CharCount substringLength = position - nextSourcePosition;
  977. accumulatedResultBuilder.Append(input, nextSourcePosition, substringLength);
  978. appendReplacement(accumulatedResultBuilder, tempAlloc, matchStr, (int) numberOfCapturesToKeep, captures, position);
  979. nextSourcePosition = JavascriptRegExp::AddIndex(position, matchStr->GetLength());
  980. }
  981. });
  982. }
  983. END_TEMP_ALLOCATOR(tempAlloc, scriptContext);
  984. if (nextSourcePosition < inputLength)
  985. {
  986. CharCount substringLength = inputLength - nextSourcePosition;
  987. accumulatedResultBuilder.Append(input, nextSourcePosition, substringLength);
  988. }
  989. accumulatedResult = accumulatedResultBuilder.ToString();
  990. Assert(accumulatedResult != nullptr);
  991. return accumulatedResult;
  992. }
  993. // String.prototype.replace, replace value has been converted to a string (ES5 15.5.4.11)
  994. Var RegexHelper::RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult)
  995. {
  996. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  997. const char16* replaceStr = replace->GetString();
  998. CharCount replaceLength = replace->GetLength();
  999. const char16* inputStr = input->GetString();
  1000. CharCount inputLength = input->GetLength();
  1001. JavascriptString* newString = nullptr;
  1002. #if ENABLE_REGEX_CONFIG_OPTIONS
  1003. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Replace, regularExpression, input, replace);
  1004. #endif
  1005. RegexMatchState state;
  1006. PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, true);
  1007. UnifiedRegex::GroupInfo lastActualMatch;
  1008. UnifiedRegex::GroupInfo lastSuccessfulMatch;
  1009. const bool isGlobal = pattern->IsGlobal();
  1010. const bool isSticky = pattern->IsSticky();
  1011. // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
  1012. CharCount offset = 0;
  1013. if (!isGlobal && isSticky)
  1014. {
  1015. offset = regularExpression->GetLastIndex();
  1016. }
  1017. if (!noResult)
  1018. {
  1019. CharCount* substitutionOffsets = nullptr;
  1020. int substitutions = GetReplaceSubstitutions(replaceStr, replaceLength,
  1021. state.tempAllocatorObj->GetAllocator(), &substitutionOffsets);
  1022. // Use to see if we already have partial result populated in concatenated
  1023. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(scriptContext);
  1024. // If lastIndex > 0, append input[0..offset] characters to the result
  1025. if (offset > 0)
  1026. {
  1027. concatenated.Append(input, 0, min(offset, inputLength));
  1028. }
  1029. do
  1030. {
  1031. if (offset > inputLength)
  1032. {
  1033. lastActualMatch.Reset();
  1034. break;
  1035. }
  1036. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1037. if (lastActualMatch.IsUndefined())
  1038. break;
  1039. lastSuccessfulMatch = lastActualMatch;
  1040. concatenated.Append(input, offset, lastActualMatch.offset - offset);
  1041. if (substitutionOffsets != 0)
  1042. {
  1043. auto getGroup = [&](int captureIndex, Var nonMatchValue) {
  1044. return GetGroup(scriptContext, pattern, input, nonMatchValue, captureIndex);
  1045. };
  1046. const char16* matchedString = inputStr + lastActualMatch.offset;
  1047. ReplaceFormatString(scriptContext, pattern->NumGroups(), getGroup, input, matchedString, lastActualMatch, replace, substitutions, substitutionOffsets, concatenated);
  1048. }
  1049. else
  1050. {
  1051. concatenated.Append(replace);
  1052. }
  1053. if (lastActualMatch.length == 0)
  1054. {
  1055. if (lastActualMatch.offset < inputLength)
  1056. {
  1057. concatenated.Append(inputStr[lastActualMatch.offset]);
  1058. }
  1059. offset = lastActualMatch.offset + 1;
  1060. }
  1061. else
  1062. {
  1063. offset = lastActualMatch.EndOffset();
  1064. }
  1065. }
  1066. while (isGlobal);
  1067. if (offset == 0)
  1068. {
  1069. // There was no successful match so the result is the input string.
  1070. newString = input;
  1071. }
  1072. else
  1073. {
  1074. if (offset < inputLength)
  1075. {
  1076. concatenated.Append(input, offset, inputLength - offset);
  1077. }
  1078. newString = concatenated.ToString();
  1079. }
  1080. substitutionOffsets = 0;
  1081. }
  1082. else
  1083. {
  1084. do
  1085. {
  1086. if (offset > inputLength)
  1087. {
  1088. lastActualMatch.Reset();
  1089. break;
  1090. }
  1091. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1092. if (lastActualMatch.IsUndefined())
  1093. break;
  1094. lastSuccessfulMatch = lastActualMatch;
  1095. offset = lastActualMatch.length == 0? lastActualMatch.offset + 1 : lastActualMatch.EndOffset();
  1096. }
  1097. while (isGlobal);
  1098. newString = scriptContext->GetLibrary()->GetEmptyString();
  1099. }
  1100. PrimEndMatch(state, scriptContext, pattern);
  1101. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  1102. return newString;
  1103. }
  1104. Var RegexHelper::RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, RecyclableObject* replacefn)
  1105. {
  1106. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  1107. if (scriptConfig->IsES6RegExSymbolsEnabled() && IsRegexSymbolReplaceObservable(thisObj, scriptContext))
  1108. {
  1109. return RegexEs6ReplaceImpl(scriptContext, thisObj, input, replacefn);
  1110. }
  1111. else
  1112. {
  1113. PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
  1114. ? _u("RegExp.prototype[Symbol.replace]")
  1115. : _u("String.prototype.replace");
  1116. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  1117. return RegexEs5ReplaceImpl(scriptContext, regularExpression, input, replacefn);
  1118. }
  1119. }
  1120. // String.prototype.replace, replace value is a function (ES5 15.5.4.11)
  1121. Var RegexHelper::RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, RecyclableObject* replacefn)
  1122. {
  1123. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1124. JavascriptString* newString = nullptr;
  1125. const char16* inputStr = input->GetString();
  1126. CharCount inputLength = input->GetLength();
  1127. const uint16 numGroups = pattern->NumGroups();
  1128. Var nonMatchValue = NonMatchValue(scriptContext, false);
  1129. UnifiedRegex::GroupInfo lastMatch; // initially undefined
  1130. // Regex parser should ensure this condition holds, but let's be doubly sure.
  1131. // numGroups is always positive because the entire regex counts as a capturing group.
  1132. AssertOrFailFast(0 < numGroups && numGroups <= INT16_MAX);
  1133. #if ENABLE_REGEX_CONFIG_OPTIONS
  1134. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Replace, regularExpression, input, scriptContext->GetLibrary()->CreateStringFromCppLiteral(_u("<replace function>")));
  1135. #endif
  1136. RegexMatchState state;
  1137. PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, false);
  1138. // NOTE: These must be kept out of the scope of the try below!
  1139. const bool isGlobal = pattern->IsGlobal();
  1140. const bool isSticky = pattern->IsSticky();
  1141. // If global = true, set lastIndex to 0 in case it is used in replacefn
  1142. if (isGlobal)
  1143. {
  1144. regularExpression->SetLastIndex(0);
  1145. }
  1146. // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
  1147. CharCount offset = 0;
  1148. if (!isGlobal && isSticky)
  1149. {
  1150. offset = regularExpression->GetLastIndex();
  1151. }
  1152. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(scriptContext);
  1153. UnifiedRegex::GroupInfo lastActualMatch;
  1154. UnifiedRegex::GroupInfo lastSuccessfulMatch;
  1155. // Replace function must be called with arguments (<function's this>, group0, ..., groupn, offset, input)
  1156. // The garbage collector must know about this array since it is being passed back into script land
  1157. Var* replaceArgs;
  1158. PROBE_STACK_NO_DISPOSE(scriptContext, (numGroups + 3) * sizeof(Var));
  1159. replaceArgs = (Var*)_alloca((numGroups + 3) * sizeof(Var));
  1160. replaceArgs[0] = scriptContext->GetLibrary()->GetUndefined();
  1161. replaceArgs[numGroups + 2] = input;
  1162. if (offset > 0)
  1163. {
  1164. concatenated.Append(input, 0, min(offset, inputLength));
  1165. }
  1166. do
  1167. {
  1168. if (offset > inputLength)
  1169. {
  1170. lastActualMatch.Reset();
  1171. break;
  1172. }
  1173. lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1174. if (lastActualMatch.IsUndefined())
  1175. break;
  1176. lastSuccessfulMatch = lastActualMatch;
  1177. for (int groupId = 0; groupId < numGroups; groupId++)
  1178. replaceArgs[groupId + 1] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
  1179. replaceArgs[numGroups + 1] = JavascriptNumber::ToVar(lastActualMatch.offset, scriptContext);
  1180. // The called function must see the global state updated by the current match
  1181. // (Should the function reach into a RegExp field, the pattern will still be valid, thus there's no
  1182. // danger of the primitive regex matcher being re-entered)
  1183. // WARNING: We go off into script land here, which way in turn invoke a regex operation, even on the
  1184. // same regex.
  1185. ThreadContext* threadContext = scriptContext->GetThreadContext();
  1186. Var replaceVar = threadContext->ExecuteImplicitCall(replacefn, ImplicitCall_Accessor, [=]()->Js::Var
  1187. {
  1188. return JavascriptFunction::CallFunction<true>(replacefn, replacefn->GetEntryPoint(), Arguments(CallInfo(UInt16Math::Add(numGroups, 3)), replaceArgs));
  1189. });
  1190. JavascriptString* replace = JavascriptConversion::ToString(replaceVar, scriptContext);
  1191. concatenated.Append(input, offset, lastActualMatch.offset - offset);
  1192. concatenated.Append(replace);
  1193. if (lastActualMatch.length == 0)
  1194. {
  1195. if (lastActualMatch.offset < inputLength)
  1196. {
  1197. concatenated.Append(inputStr[lastActualMatch.offset]);
  1198. }
  1199. offset = lastActualMatch.offset + 1;
  1200. }
  1201. else
  1202. {
  1203. offset = lastActualMatch.EndOffset();
  1204. }
  1205. }
  1206. while (isGlobal);
  1207. PrimEndMatch(state, scriptContext, pattern);
  1208. if (offset == 0)
  1209. {
  1210. // There was no successful match so the result is the input string.
  1211. newString = input;
  1212. }
  1213. else
  1214. {
  1215. if (offset < inputLength)
  1216. {
  1217. concatenated.Append(input, offset, inputLength - offset);
  1218. }
  1219. newString = concatenated.ToString();
  1220. }
  1221. PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
  1222. return newString;
  1223. }
  1224. Var RegexHelper::StringReplace(JavascriptString* match, JavascriptString* input, JavascriptString* replace)
  1225. {
  1226. CharCount matchedIndex = JavascriptString::strstr(input, match, true);
  1227. if (matchedIndex == CharCountFlag)
  1228. {
  1229. return input;
  1230. }
  1231. const char16 *const replaceStr = replace->GetString();
  1232. // Unfortunately, due to the possibility of there being $ escapes, we can't just wmemcpy the replace string. Check if we
  1233. // have a small replace string that we can quickly scan for '$', to see if we can just wmemcpy.
  1234. bool definitelyNoEscapes = replace->GetLength() == 0;
  1235. if(!definitelyNoEscapes && replace->GetLength() <= 8)
  1236. {
  1237. CharCount i = 0;
  1238. for(; i < replace->GetLength() && replaceStr[i] != _u('$'); ++i);
  1239. definitelyNoEscapes = i >= replace->GetLength();
  1240. }
  1241. if(definitelyNoEscapes)
  1242. {
  1243. const char16* inputStr = input->GetString();
  1244. const char16* prefixStr = inputStr;
  1245. CharCount prefixLength = (CharCount)matchedIndex;
  1246. const char16* postfixStr = inputStr + prefixLength + match->GetLength();
  1247. CharCount postfixLength = input->GetLength() - prefixLength - match->GetLength();
  1248. CharCount newLength = prefixLength + postfixLength + replace->GetLength();
  1249. BufferStringBuilder bufferString(newLength, match->GetScriptContext());
  1250. bufferString.SetContent(prefixStr, prefixLength,
  1251. replaceStr, replace->GetLength(),
  1252. postfixStr, postfixLength);
  1253. return bufferString.ToString();
  1254. }
  1255. CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(input->GetScriptContext());
  1256. // Copy portion of input string that precedes the matched substring
  1257. concatenated.Append(input, 0, matchedIndex);
  1258. // Copy the replace string with substitutions
  1259. CharCount i = 0, j = 0;
  1260. for(; j < replace->GetLength(); ++j)
  1261. {
  1262. if(replaceStr[j] == _u('$') && j + 1 < replace->GetLength())
  1263. {
  1264. switch(replaceStr[j + 1])
  1265. {
  1266. case _u('$'): // literal '$'
  1267. ++j;
  1268. concatenated.Append(replace, i, j - i);
  1269. i = j + 1;
  1270. break;
  1271. case _u('&'): // matched substring
  1272. concatenated.Append(replace, i, j - i);
  1273. concatenated.Append(match);
  1274. ++j;
  1275. i = j + 1;
  1276. break;
  1277. case _u('`'): // portion of input string that precedes the matched substring
  1278. concatenated.Append(replace, i, j - i);
  1279. concatenated.Append(input, 0, matchedIndex);
  1280. ++j;
  1281. i = j + 1;
  1282. break;
  1283. case _u('\''): // portion of input string that follows the matched substring
  1284. concatenated.Append(replace, i, j - i);
  1285. concatenated.Append(
  1286. input,
  1287. matchedIndex + match->GetLength(),
  1288. input->GetLength() - matchedIndex - match->GetLength());
  1289. ++j;
  1290. i = j + 1;
  1291. break;
  1292. default: // take both the initial '$' and the following character literally
  1293. ++j;
  1294. }
  1295. }
  1296. }
  1297. Assert(i <= j);
  1298. concatenated.Append(replace, i, j - i);
  1299. // Copy portion of input string that follows the matched substring
  1300. concatenated.Append(input, matchedIndex + match->GetLength(), input->GetLength() - matchedIndex - match->GetLength());
  1301. return concatenated.ToString();
  1302. }
  1303. Var RegexHelper::StringReplace(ScriptContext* scriptContext, JavascriptString* match, JavascriptString* input, RecyclableObject* replacefn)
  1304. {
  1305. CharCount indexMatched = JavascriptString::strstr(input, match, true);
  1306. Assert(match->GetScriptContext() == scriptContext);
  1307. Assert(input->GetScriptContext() == scriptContext);
  1308. if (indexMatched != CharCountFlag)
  1309. {
  1310. ThreadContext* threadContext = scriptContext->GetThreadContext();
  1311. Var replaceVar = threadContext->ExecuteImplicitCall(replacefn, ImplicitCall_Accessor, [=]()->Js::Var
  1312. {
  1313. Var pThis = scriptContext->GetLibrary()->GetUndefined();
  1314. return CALL_FUNCTION(threadContext, replacefn, CallInfo(4), pThis, match, JavascriptNumber::ToVar((int)indexMatched, scriptContext), input);
  1315. });
  1316. JavascriptString* replace = JavascriptConversion::ToString(replaceVar, scriptContext);
  1317. const char16* inputStr = input->GetString();
  1318. const char16* prefixStr = inputStr;
  1319. CharCount prefixLength = indexMatched;
  1320. const char16* postfixStr = inputStr + prefixLength + match->GetLength();
  1321. CharCount postfixLength = input->GetLength() - prefixLength - match->GetLength();
  1322. CharCount newLength = prefixLength + postfixLength + replace->GetLength();
  1323. BufferStringBuilder bufferString(newLength, match->GetScriptContext());
  1324. bufferString.SetContent(prefixStr, prefixLength,
  1325. replace->GetString(), replace->GetLength(),
  1326. postfixStr, postfixLength);
  1327. return bufferString.ToString();
  1328. }
  1329. return input;
  1330. }
  1331. void RegexHelper::AppendSubString(ScriptContext* scriptContext, JavascriptArray* ary, JavascriptString* input, CharCount startInclusive, CharCount endExclusive)
  1332. {
  1333. Assert(endExclusive >= startInclusive);
  1334. Assert(endExclusive <= input->GetLength());
  1335. CharCount length = endExclusive - startInclusive;
  1336. JavascriptString* subString;
  1337. if (length == 0)
  1338. {
  1339. subString = scriptContext->GetLibrary()->GetEmptyString();
  1340. }
  1341. else if (length == 1)
  1342. {
  1343. subString = scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(input->GetString()[startInclusive]);
  1344. }
  1345. else
  1346. {
  1347. subString = SubString::New(input, startInclusive, length);
  1348. }
  1349. ary->DirectAppendItem(subString);
  1350. }
  1351. inline UnifiedRegex::RegexPattern *RegexHelper::GetSplitPattern(ScriptContext* scriptContext, JavascriptRegExp *regularExpression)
  1352. {
  1353. UnifiedRegex::RegexPattern* splitPattern = regularExpression->GetSplitPattern();
  1354. if (!splitPattern)
  1355. {
  1356. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1357. bool isSticky = (pattern->GetFlags() & UnifiedRegex::StickyRegexFlag) != 0;
  1358. if (!isSticky)
  1359. {
  1360. splitPattern = pattern;
  1361. }
  1362. else
  1363. {
  1364. // When the sticky flag is present, the pattern will match the input only at
  1365. // the beginning since "lastIndex" is set to 0 before the first iteration.
  1366. // However, for split(), we need to look for the pattern anywhere in the input.
  1367. //
  1368. // One way to handle this is to use the original pattern with the sticky flag and
  1369. // when it fails, move to the next character and retry.
  1370. //
  1371. // Another way, which is implemented here, is to create another pattern without the
  1372. // sticky flag and have it automatically look for itself anywhere in the input. This
  1373. // way, we can also take advantage of the optimizations for the global search (e.g.,
  1374. // the Boyer-Moore string search).
  1375. InternalString source = pattern->GetSource();
  1376. UnifiedRegex::RegexFlags nonStickyFlags =
  1377. static_cast<UnifiedRegex::RegexFlags>(pattern->GetFlags() & ~UnifiedRegex::StickyRegexFlag);
  1378. splitPattern = CompileDynamic(
  1379. scriptContext,
  1380. source.GetBuffer(),
  1381. source.GetLength(),
  1382. nonStickyFlags,
  1383. pattern->IsLiteral());
  1384. }
  1385. regularExpression->SetSplitPattern(splitPattern);
  1386. }
  1387. return splitPattern;
  1388. }
  1389. Var RegexHelper::RegexSplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  1390. {
  1391. ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
  1392. if (scriptConfig->IsES6RegExSymbolsEnabled()
  1393. && IsRegexSymbolSplitObservable(thisObj, scriptContext))
  1394. {
  1395. return RegexEs6SplitImpl(scriptContext, thisObj, input, limit, noResult, stackAllocationPointer);
  1396. }
  1397. else
  1398. {
  1399. PCWSTR varName = scriptContext->GetConfig()->IsES6RegExSymbolsEnabled()
  1400. ? _u("RegExp.prototype[Symbol.split]")
  1401. : _u("String.prototype.split");
  1402. JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
  1403. return RegexEs5SplitImpl(scriptContext, regularExpression, input, limit, noResult, stackAllocationPointer);
  1404. }
  1405. }
  1406. bool RegexHelper::IsRegexSymbolSplitObservable(RecyclableObject* instance, ScriptContext* scriptContext)
  1407. {
  1408. DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
  1409. return !JavascriptRegExp::HasOriginalRegExType(instance)
  1410. || JavascriptRegExp::HasObservableConstructor(regexPrototype)
  1411. || JavascriptRegExp::HasObservableFlags(regexPrototype)
  1412. || JavascriptRegExp::HasObservableExec(regexPrototype);
  1413. }
  1414. Var RegexHelper::RegexEs6SplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  1415. {
  1416. PCWSTR const varName = _u("RegExp.prototype[Symbol.split]");
  1417. JavascriptFunction* defaultConstructor = scriptContext->GetLibrary()->GetRegExpConstructor();
  1418. RecyclableObject* speciesConstructor = JavascriptOperators::SpeciesConstructor(
  1419. thisObj,
  1420. defaultConstructor,
  1421. scriptContext);
  1422. AssertOrFailFast(JavascriptOperators::IsConstructor(speciesConstructor));
  1423. JavascriptString* flags = JavascriptConversion::ToString(
  1424. JavascriptOperators::GetProperty(thisObj, PropertyIds::flags, scriptContext),
  1425. scriptContext);
  1426. bool unicode = wcsstr(flags->GetString(), _u("u")) != nullptr;
  1427. flags = AppendStickyToFlagsIfNeeded(flags, scriptContext);
  1428. bool isDefaultConstructor = speciesConstructor == defaultConstructor;
  1429. Var regEx = JavascriptOperators::NewObjectCreationHelper_ReentrancySafe(speciesConstructor, isDefaultConstructor, scriptContext->GetThreadContext(), [=]()->Js::Var
  1430. {
  1431. Js::Var args[] = { speciesConstructor, thisObj, flags };
  1432. Js::CallInfo callInfo(Js::CallFlags_New, _countof(args));
  1433. return JavascriptOperators::NewScObject(
  1434. speciesConstructor,
  1435. Js::Arguments(callInfo, args),
  1436. scriptContext);
  1437. });
  1438. RecyclableObject* splitter = UnsafeVarTo<RecyclableObject>(regEx);
  1439. JavascriptArray* arrayResult = scriptContext->GetLibrary()->CreateArray();
  1440. if (limit == 0)
  1441. {
  1442. return arrayResult;
  1443. }
  1444. CharCount inputLength = input->GetLength();
  1445. if (inputLength == 0)
  1446. {
  1447. Var result = JavascriptRegExp::CallExec(splitter, input, varName, scriptContext);
  1448. if (!JavascriptOperators::IsNull(result))
  1449. {
  1450. return arrayResult;
  1451. }
  1452. arrayResult->DirectAppendItem(input);
  1453. return arrayResult;
  1454. }
  1455. CharCount substringStartIndex = 0; // 'p' in spec
  1456. CharCount substringEndIndex = substringStartIndex; // 'q' in spec
  1457. do // inputLength > 0
  1458. {
  1459. JavascriptRegExp::SetLastIndexProperty(splitter, substringEndIndex, scriptContext);
  1460. Var result = JavascriptRegExp::CallExec(splitter, input, varName, scriptContext); // 'z' in spec
  1461. if (JavascriptOperators::IsNull(result))
  1462. {
  1463. substringEndIndex = AdvanceStringIndex(input, substringEndIndex, unicode);
  1464. }
  1465. else
  1466. {
  1467. CharCount endIndex = JavascriptRegExp::GetLastIndexProperty(splitter, scriptContext); // 'e' in spec
  1468. endIndex = min(endIndex, inputLength);
  1469. if (endIndex == substringStartIndex)
  1470. {
  1471. substringEndIndex = AdvanceStringIndex(input, substringEndIndex, unicode);
  1472. }
  1473. else
  1474. {
  1475. AppendSubString(scriptContext, arrayResult, input, substringStartIndex, substringEndIndex);
  1476. if (arrayResult->GetLength() == limit)
  1477. {
  1478. return arrayResult;
  1479. }
  1480. substringStartIndex = endIndex;
  1481. RecyclableObject* resultObject = ExecResultToRecyclableObject(result);
  1482. int64 length = JavascriptConversion::ToLength(
  1483. JavascriptOperators::GetProperty(resultObject, PropertyIds::length, scriptContext),
  1484. scriptContext);
  1485. uint64 numberOfCaptures = max(length - 1, (int64) 0);
  1486. for (uint64 i = 1; i <= numberOfCaptures; ++i)
  1487. {
  1488. Var nextCapture = JavascriptOperators::GetItem(resultObject, i, scriptContext);
  1489. arrayResult->DirectAppendItem(nextCapture);
  1490. if (arrayResult->GetLength() == limit)
  1491. {
  1492. return arrayResult;
  1493. }
  1494. }
  1495. substringEndIndex = substringStartIndex;
  1496. }
  1497. }
  1498. }
  1499. while (substringEndIndex < inputLength);
  1500. AppendSubString(scriptContext, arrayResult, input, substringStartIndex, substringEndIndex);
  1501. return arrayResult;
  1502. }
  1503. JavascriptString* RegexHelper::AppendStickyToFlagsIfNeeded(JavascriptString* flags, ScriptContext* scriptContext)
  1504. {
  1505. const char16* flagsString = flags->GetString();
  1506. if (wcsstr(flagsString, _u("y")) == nullptr)
  1507. {
  1508. BEGIN_TEMP_ALLOCATOR(tempAlloc, scriptContext, _u("RegexHelper"))
  1509. {
  1510. StringBuilder<ArenaAllocator> bs(tempAlloc, flags->GetLength() + 1);
  1511. bs.Append(flagsString, flags->GetLength());
  1512. bs.Append(_u('y'));
  1513. flags = Js::JavascriptString::NewCopyBuffer(bs.Detach(), bs.Count(), scriptContext);
  1514. }
  1515. END_TEMP_ALLOCATOR(tempAlloc, scriptContext);
  1516. }
  1517. return flags;
  1518. }
  1519. // String.prototype.split (ES5 15.5.4.14)
  1520. Var RegexHelper::RegexEs5SplitImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  1521. {
  1522. if (noResult && scriptContext->GetConfig()->SkipSplitOnNoResult())
  1523. {
  1524. // TODO: Fix this so that the side effect for PropagateLastMatch is done
  1525. return scriptContext->GetLibrary()->GetNull();
  1526. }
  1527. #if ENABLE_REGEX_CONFIG_OPTIONS
  1528. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Split, regularExpression, input);
  1529. #endif
  1530. JavascriptArray* ary = scriptContext->GetLibrary()->CreateArrayOnStack(stackAllocationPointer);
  1531. if (limit == 0)
  1532. {
  1533. // SPECIAL CASE: Zero limit
  1534. return ary;
  1535. }
  1536. UnifiedRegex::RegexPattern *splitPattern = GetSplitPattern(scriptContext, regularExpression);
  1537. const char16* inputStr = input->GetString();
  1538. CharCount inputLength = input->GetLength(); // s in spec
  1539. const int numGroups = splitPattern->NumGroups();
  1540. Var nonMatchValue = NonMatchValue(scriptContext, false);
  1541. UnifiedRegex::GroupInfo lastSuccessfulMatch; // initially undefined
  1542. RegexMatchState state;
  1543. PrimBeginMatch(state, scriptContext, splitPattern, inputStr, inputLength, false);
  1544. if (inputLength == 0)
  1545. {
  1546. // SPECIAL CASE: Empty string
  1547. UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, splitPattern, inputLength, 0);
  1548. if (match.IsUndefined())
  1549. ary->DirectAppendItem(input);
  1550. else
  1551. lastSuccessfulMatch = match;
  1552. }
  1553. else
  1554. {
  1555. CharCount copyOffset = 0; // p in spec
  1556. CharCount startOffset = 0; // q in spec
  1557. CharCount inputLimit = inputLength;
  1558. while (startOffset < inputLimit)
  1559. {
  1560. UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, splitPattern, inputLength, startOffset);
  1561. if (match.IsUndefined())
  1562. break;
  1563. lastSuccessfulMatch = match;
  1564. if (match.offset >= inputLimit)
  1565. break;
  1566. startOffset = match.offset;
  1567. CharCount endOffset = match.EndOffset(); // e in spec
  1568. if (endOffset == copyOffset)
  1569. startOffset++;
  1570. else
  1571. {
  1572. AppendSubString(scriptContext, ary, input, copyOffset, startOffset);
  1573. if (ary->GetLength() >= limit)
  1574. break;
  1575. startOffset = copyOffset = endOffset;
  1576. for (int groupId = 1; groupId < numGroups; groupId++)
  1577. {
  1578. ary->DirectAppendItem(GetGroup(scriptContext, splitPattern, input, nonMatchValue, groupId));
  1579. if (ary->GetLength() >= limit)
  1580. break;
  1581. }
  1582. }
  1583. }
  1584. if (ary->GetLength() < limit)
  1585. AppendSubString(scriptContext, ary, input, copyOffset, inputLength);
  1586. }
  1587. PrimEndMatch(state, scriptContext, splitPattern);
  1588. Assert(!splitPattern->IsSticky());
  1589. PropagateLastMatch
  1590. ( scriptContext
  1591. , splitPattern->IsGlobal()
  1592. , /* isSticky */ false
  1593. , regularExpression
  1594. , input
  1595. , lastSuccessfulMatch
  1596. , UnifiedRegex::GroupInfo()
  1597. , /* updateRegex */ true
  1598. , /* updateCtor */ true
  1599. , /* useSplitPattern */ true );
  1600. return ary;
  1601. }
  1602. UnifiedRegex::GroupInfo
  1603. RegexHelper::SimpleMatch(ScriptContext * scriptContext, UnifiedRegex::RegexPattern * pattern, const char16 * input, CharCount inputLength, CharCount offset)
  1604. {
  1605. RegexMatchState state;
  1606. PrimBeginMatch(state, scriptContext, pattern, input, inputLength, false);
  1607. UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, pattern, inputLength, offset);
  1608. PrimEndMatch(state, scriptContext, pattern);
  1609. return match;
  1610. }
  1611. // String.prototype.search (ES5 15.5.4.12)
  1612. Var RegexHelper::RegexSearchImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1613. {
  1614. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1615. const char16* inputStr = input->GetString();
  1616. CharCount inputLength = input->GetLength();
  1617. #if ENABLE_REGEX_CONFIG_OPTIONS
  1618. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Search, regularExpression, input);
  1619. #endif
  1620. UnifiedRegex::GroupInfo match = RegexHelper::SimpleMatch(scriptContext, pattern, inputStr, inputLength, 0);
  1621. PropagateLastMatch(scriptContext, pattern->IsGlobal(), pattern->IsSticky(), regularExpression, input, match, match, false, true);
  1622. return JavascriptNumber::ToVar(match.IsUndefined() ? -1 : (int32)match.offset, scriptContext);
  1623. }
  1624. // String.prototype.split (ES5 15.5.4.14)
  1625. Var RegexHelper::StringSplit(JavascriptString* match, JavascriptString* input, CharCount limit)
  1626. {
  1627. ScriptContext* scriptContext = match->GetScriptContext();
  1628. JavascriptArray* ary;
  1629. CharCount matchLen = match->GetLength();
  1630. if (matchLen == 0)
  1631. {
  1632. CharCount count = min(input->GetLength(), limit);
  1633. ary = scriptContext->GetLibrary()->CreateArray(count);
  1634. const char16 * charString = input->GetString();
  1635. for (CharCount i = 0; i < count; i++)
  1636. {
  1637. ary->DirectSetItemAt(i, scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(charString[i]));
  1638. }
  1639. }
  1640. else
  1641. {
  1642. CharCount i = 0;
  1643. CharCount offset = 0;
  1644. ary = scriptContext->GetLibrary()->CreateArray(0);
  1645. while (i < limit)
  1646. {
  1647. CharCount prevOffset = offset;
  1648. offset = JavascriptString::strstr(input, match, false, prevOffset);
  1649. if (offset != CharCountFlag)
  1650. {
  1651. ary->DirectSetItemAt(i++, SubString::New(input, prevOffset, offset-prevOffset));
  1652. offset += max(matchLen, static_cast<CharCount>(1));
  1653. if (offset > input->GetLength())
  1654. break;
  1655. }
  1656. else
  1657. {
  1658. ary->DirectSetItemAt(i++, SubString::New(input, prevOffset, input->GetLength() - prevOffset));
  1659. break;
  1660. }
  1661. }
  1662. }
  1663. return ary;
  1664. }
  1665. bool RegexHelper::IsResultNotUsed(CallFlags flags)
  1666. {
  1667. return !PHASE_OFF1(Js::RegexResultNotUsedPhase) && ((flags & CallFlags_NotUsed) != 0);
  1668. }
  1669. // ----------------------------------------------------------------------
  1670. // Primitives
  1671. // ----------------------------------------------------------------------
  1672. void RegexHelper::PrimBeginMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, const char16* input, CharCount inputLength, bool alwaysNeedAlloc)
  1673. {
  1674. state.input = input;
  1675. if (pattern->rep.unified.matcher == 0)
  1676. pattern->rep.unified.matcher = UnifiedRegex::Matcher::New(scriptContext, pattern);
  1677. if (alwaysNeedAlloc)
  1678. state.tempAllocatorObj = scriptContext->GetTemporaryAllocator(_u("RegexUnifiedExecTemp"));
  1679. else
  1680. state.tempAllocatorObj = 0;
  1681. }
  1682. UnifiedRegex::GroupInfo
  1683. RegexHelper::PrimMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, CharCount inputLength, CharCount offset)
  1684. {
  1685. Assert(pattern->rep.unified.program != 0);
  1686. Assert(pattern->rep.unified.matcher != 0);
  1687. #if ENABLE_REGEX_CONFIG_OPTIONS
  1688. UnifiedRegex::RegexStats* stats = 0;
  1689. if (REGEX_CONFIG_FLAG(RegexProfile))
  1690. {
  1691. stats = scriptContext->GetRegexStatsDatabase()->GetRegexStats(pattern);
  1692. scriptContext->GetRegexStatsDatabase()->BeginProfile();
  1693. }
  1694. UnifiedRegex::DebugWriter* w = 0;
  1695. if (REGEX_CONFIG_FLAG(RegexTracing) && CONFIG_FLAG(Verbose))
  1696. w = scriptContext->GetRegexDebugWriter();
  1697. #endif
  1698. pattern->rep.unified.matcher->Match
  1699. (state.input
  1700. , inputLength
  1701. , offset
  1702. , scriptContext
  1703. #if ENABLE_REGEX_CONFIG_OPTIONS
  1704. , stats
  1705. , w
  1706. #endif
  1707. );
  1708. #if ENABLE_REGEX_CONFIG_OPTIONS
  1709. if (REGEX_CONFIG_FLAG(RegexProfile))
  1710. scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Execute);
  1711. #endif
  1712. return pattern->GetGroup(0);
  1713. }
  1714. void RegexHelper::PrimEndMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern)
  1715. {
  1716. if (state.tempAllocatorObj != 0)
  1717. scriptContext->ReleaseTemporaryAllocator(state.tempAllocatorObj);
  1718. }
  1719. Var RegexHelper::NonMatchValue(ScriptContext* scriptContext, bool isGlobalCtor)
  1720. {
  1721. // SPEC DEVIATION: The $n properties of the RegExp ctor use empty strings rather than undefined to represent
  1722. // the non-match value, even in ES5 mode.
  1723. if (isGlobalCtor)
  1724. return scriptContext->GetLibrary()->GetEmptyString();
  1725. else
  1726. return scriptContext->GetLibrary()->GetUndefined();
  1727. }
  1728. Var RegexHelper::GetString(ScriptContext* scriptContext, JavascriptString* input, Var nonMatchValue, UnifiedRegex::GroupInfo group)
  1729. {
  1730. if (group.IsUndefined())
  1731. return nonMatchValue;
  1732. switch (group.length)
  1733. {
  1734. case 0:
  1735. return scriptContext->GetLibrary()->GetEmptyString();
  1736. case 1:
  1737. {
  1738. const char16* inputStr = input->GetString();
  1739. return scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(inputStr[group.offset]);
  1740. }
  1741. case 2:
  1742. {
  1743. const char16* inputStr = input->GetString();
  1744. PropertyString* propString = scriptContext->GetPropertyString2(inputStr[group.offset], inputStr[group.offset + 1]);
  1745. if (propString != 0)
  1746. return propString;
  1747. // fall-through for default
  1748. }
  1749. default:
  1750. return SubString::New(input, group.offset, group.length);
  1751. }
  1752. }
  1753. Var RegexHelper::GetGroup(ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, JavascriptString* input, Var nonMatchValue, int groupId)
  1754. {
  1755. return GetString(scriptContext, input, nonMatchValue, pattern->GetGroup(groupId));
  1756. }
  1757. // ======================================================================
  1758. // Match results propagate into three places:
  1759. // - The match result array. Generally the array has string entries for the overall match substring,
  1760. // followed by final bindings for each group, plus the fields:
  1761. // - 'input': string used in match
  1762. // - 'index': index of first character of match in input
  1763. // - 'lastIndex' (IE extension): one plus index of last character of match in input
  1764. // However, for String.match with a global match, the result is an array of all match results
  1765. // (ignoring any group bindings). But in IE8 mode we also bind the above fields to that array,
  1766. // using the results of the last successful primitive match.
  1767. // - The regular expression object has writable field:
  1768. // - 'lastIndex': one plus index of last character of last match in last input
  1769. // - 'lastInput
  1770. // - (Host extension) The RegExp constructor object has fields:
  1771. // - '$n': last match substrings, using "" for undefined in all modes
  1772. // - etc (see JavascriptRegExpConstructorType.cpp)
  1773. //
  1774. // There are also three influences on what gets propagated where and when:
  1775. // - Whether the regular expression is global
  1776. // - Whether the primitive operations runs the regular expression until failure (e.g. String.match) or
  1777. // just once (e.g. RegExp.exec), or use the underlying matching machinery implicitly (e.g. String.split).
  1778. //
  1779. // Here are the rules:
  1780. // - RegExp is updated for the last *successful* primitive match, except for String.replace.
  1781. // In particular, for String.match with a global regex, the final failing match *does not* reset RegExp.
  1782. // - Except for String.search in EC5 mode (which does not update 'lastIndex'), the regular expressions
  1783. // lastIndex is updated as follows:
  1784. // - ES5 mode, if a primitive match fails then the regular expression 'lastIndex' is set to 0. In particular,
  1785. // the final failing primitive match for String.match with a global regex forces 'lastIndex' to be reset.
  1786. // However, if a primitive match succeeds then the regular expression 'lastIndex' is updated only for
  1787. // a global regex.
  1788. // for success. However:
  1789. // - The last failing match in a String.match with a global regex does NOT reset 'lastIndex'.
  1790. // - If the regular expression matched empty, the last index is set assuming the pattern actually matched
  1791. // one input character. This applies even if the pattern matched empty one beyond the end of the string
  1792. // in a String.match with a global regex (!). For our own sanity, we isolate this particular case
  1793. // within JavascriptRegExp when setting the lastIndexVar value.
  1794. // - In all modes, 'lastIndex' determines the starting search index only for global regular expressions.
  1795. //
  1796. // ======================================================================
  1797. void RegexHelper::PropagateLastMatch
  1798. ( ScriptContext* scriptContext
  1799. , bool isGlobal
  1800. , bool isSticky
  1801. , JavascriptRegExp* regularExpression
  1802. , JavascriptString* lastInput
  1803. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  1804. , UnifiedRegex::GroupInfo lastActualMatch
  1805. , bool updateRegex
  1806. , bool updateCtor
  1807. , bool useSplitPattern )
  1808. {
  1809. if (updateRegex)
  1810. {
  1811. PropagateLastMatchToRegex(scriptContext, isGlobal, isSticky, regularExpression, lastSuccessfulMatch, lastActualMatch);
  1812. }
  1813. if (updateCtor)
  1814. {
  1815. PropagateLastMatchToCtor(scriptContext, regularExpression, lastInput, lastSuccessfulMatch, useSplitPattern);
  1816. }
  1817. }
  1818. void RegexHelper::PropagateLastMatchToRegex
  1819. ( ScriptContext* scriptContext
  1820. , bool isGlobal
  1821. , bool isSticky
  1822. , JavascriptRegExp* regularExpression
  1823. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  1824. , UnifiedRegex::GroupInfo lastActualMatch )
  1825. {
  1826. if (lastActualMatch.IsUndefined())
  1827. {
  1828. regularExpression->SetLastIndex(0);
  1829. }
  1830. else if (isGlobal || isSticky)
  1831. {
  1832. CharCount lastIndex = lastActualMatch.EndOffset();
  1833. Assert(lastIndex <= MaxCharCount);
  1834. regularExpression->SetLastIndex((int32)lastIndex);
  1835. }
  1836. }
  1837. void RegexHelper::PropagateLastMatchToCtor
  1838. ( ScriptContext* scriptContext
  1839. , JavascriptRegExp* regularExpression
  1840. , JavascriptString* lastInput
  1841. , UnifiedRegex::GroupInfo lastSuccessfulMatch
  1842. , bool useSplitPattern )
  1843. {
  1844. Assert(lastInput);
  1845. if (!lastSuccessfulMatch.IsUndefined())
  1846. {
  1847. // Notes:
  1848. // - SPEC DEVIATION: The RegExp ctor holds some details of the last successful match on any regular expression.
  1849. // - For updating regex ctor's stats we are using entry function's context, rather than regex context,
  1850. // the rational is: use same context of RegExp.prototype, on which the function was called.
  1851. // So, if you call the function with remoteContext.regexInstance.exec.call(localRegexInstance, "match string"),
  1852. // we will update stats in the context related to the exec function, i.e. remoteContext.
  1853. // This is consistent with other browsers
  1854. UnifiedRegex::RegexPattern* pattern = useSplitPattern
  1855. ? regularExpression->GetSplitPattern()
  1856. : regularExpression->GetPattern();
  1857. scriptContext->GetLibrary()->GetRegExpConstructor()->SetLastMatch(pattern, lastInput, lastSuccessfulMatch);
  1858. }
  1859. }
  1860. void RegexHelper::InvalidateLastMatchOnCtor(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* lastInput, bool useSplitPattern)
  1861. {
  1862. Assert(lastInput);
  1863. UnifiedRegex::RegexPattern* pattern = useSplitPattern
  1864. ? regularExpression->GetSplitPattern()
  1865. : regularExpression->GetPattern();
  1866. scriptContext->GetLibrary()->GetRegExpConstructor()->InvalidateLastMatch(pattern, lastInput);
  1867. }
  1868. bool RegexHelper::GetInitialOffset(bool isGlobal, bool isSticky, JavascriptRegExp* regularExpression, CharCount inputLength, CharCount& offset)
  1869. {
  1870. if (isGlobal || isSticky)
  1871. {
  1872. offset = regularExpression->GetLastIndex();
  1873. if (offset <= MaxCharCount)
  1874. return true;
  1875. else
  1876. {
  1877. regularExpression->SetLastIndex(0);
  1878. return false;
  1879. }
  1880. }
  1881. else
  1882. {
  1883. offset = 0;
  1884. return true;
  1885. }
  1886. }
  1887. JavascriptArray* RegexHelper::CreateMatchResult(void *const stackAllocationPointer, ScriptContext* scriptContext, bool isGlobal, int numGroups, JavascriptString* input)
  1888. {
  1889. if (isGlobal)
  1890. {
  1891. // Use an ordinary array, with default initial capacity
  1892. return scriptContext->GetLibrary()->CreateArrayOnStack(stackAllocationPointer);
  1893. }
  1894. else
  1895. return JavascriptRegularExpressionResult::Create(stackAllocationPointer, numGroups, input, scriptContext);
  1896. }
  1897. void RegexHelper::FinalizeMatchResult(ScriptContext* scriptContext, bool isGlobal, JavascriptArray* arr, UnifiedRegex::GroupInfo match)
  1898. {
  1899. if (!isGlobal)
  1900. JavascriptRegularExpressionResult::SetMatch(arr, match);
  1901. // else: arr is an ordinary array
  1902. }
  1903. JavascriptArray* RegexHelper::CreateExecResult(void *const stackAllocationPointer, ScriptContext* scriptContext, int numGroups, JavascriptString* input, UnifiedRegex::GroupInfo match)
  1904. {
  1905. JavascriptArray* res = JavascriptRegularExpressionResult::Create(stackAllocationPointer, numGroups, input, scriptContext);
  1906. JavascriptRegularExpressionResult::SetMatch(res, match);
  1907. return res;
  1908. }
  1909. template<bool mustMatchEntireInput>
  1910. BOOL RegexHelper::RegexTest_NonScript(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength)
  1911. {
  1912. // This version of the function should only be used when testing the regex against a non-javascript string. That is,
  1913. // this call was not initiated by script code. Hence, the RegExp constructor is not updated with the last match. If
  1914. // 'mustMatchEntireInput' is true, this function also ignores the global/sticky flag and the lastIndex property, since it tests
  1915. // for a match on the entire input string; in that case, the lastIndex property is not modified.
  1916. UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
  1917. UnifiedRegex::GroupInfo match; // initially undefined
  1918. #if ENABLE_REGEX_CONFIG_OPTIONS
  1919. RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Test, regularExpression, input, inputLength);
  1920. #endif
  1921. const bool isGlobal = pattern->IsGlobal();
  1922. const bool isSticky = pattern->IsSticky();
  1923. CharCount offset;
  1924. if (mustMatchEntireInput)
  1925. offset = 0; // needs to match the entire input, so ignore 'lastIndex' and always start from the beginning
  1926. else if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
  1927. return false;
  1928. if (mustMatchEntireInput || offset <= inputLength)
  1929. {
  1930. match = RegexHelper::SimpleMatch(scriptContext, pattern, input, inputLength, offset);
  1931. }
  1932. // else: match remains undefined
  1933. if (!mustMatchEntireInput) // don't update 'lastIndex' when mustMatchEntireInput is true since the global flag is ignored
  1934. {
  1935. PropagateLastMatchToRegex(scriptContext, isGlobal, isSticky, regularExpression, match, match);
  1936. }
  1937. return mustMatchEntireInput ? match.offset == 0 && match.length == inputLength : !match.IsUndefined();
  1938. }
  1939. // explicit instantiation
  1940. template BOOL RegexHelper::RegexTest_NonScript<true>(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength);
  1941. template BOOL RegexHelper::RegexTest_NonScript<false>(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength);
  1942. // Asserts if the value needs to be marshaled to target context.
  1943. // Returns the resulting value.
  1944. // This is supposed to be called for result/return value of the RegexXXX functions.
  1945. // static
  1946. template<typename T>
  1947. T RegexHelper::CheckCrossContextAndMarshalResult(T value, ScriptContext* targetContext)
  1948. {
  1949. Assert(targetContext);
  1950. Assert(!CrossSite::NeedMarshalVar(value, targetContext));
  1951. return value;
  1952. }
  1953. Var RegexHelper::RegexMatchResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1954. {
  1955. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false);
  1956. }
  1957. Var RegexHelper::RegexMatchResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1958. {
  1959. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false, stackAllocationPointer);
  1960. }
  1961. Var RegexHelper::RegexMatchResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1962. {
  1963. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  1964. {
  1965. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, true);
  1966. }
  1967. else
  1968. {
  1969. return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false);
  1970. }
  1971. }
  1972. Var RegexHelper::RegexMatch(ScriptContext* entryFunctionContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
  1973. {
  1974. Var result = RegexHelper::RegexMatchImpl<true>(entryFunctionContext, thisObj, input, noResult, stackAllocationPointer);
  1975. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  1976. }
  1977. Var RegexHelper::RegexMatchNoHistory(ScriptContext* entryFunctionContext, JavascriptRegExp *regularExpression, JavascriptString *input, bool noResult)
  1978. {
  1979. // RegexMatchNoHistory() is used only by Intl internally and there is no need for ES6
  1980. // observable RegExp actions. Therefore, we can directly use the ES5 logic.
  1981. Var result = RegexHelper::RegexEs5MatchImpl<false>(entryFunctionContext, regularExpression, input, noResult);
  1982. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  1983. }
  1984. Var RegexHelper::RegexExecResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1985. {
  1986. return RegexHelper::RegexExec(scriptContext, regularExpression, input, false);
  1987. }
  1988. Var RegexHelper::RegexExecResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1989. {
  1990. return RegexHelper::RegexExec(scriptContext, regularExpression, input, false, stackAllocationPointer);
  1991. }
  1992. Var RegexHelper::RegexExecResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  1993. {
  1994. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  1995. {
  1996. return RegexHelper::RegexExec(scriptContext, regularExpression, input, true);
  1997. }
  1998. else
  1999. {
  2000. return RegexHelper::RegexExec(scriptContext, regularExpression, input, false);
  2001. }
  2002. }
  2003. Var RegexHelper::RegexExec(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer)
  2004. {
  2005. Var result = RegexHelper::RegexExecImpl(entryFunctionContext, regularExpression, input, noResult, stackAllocationPointer);
  2006. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2007. }
  2008. Var RegexHelper::RegexReplaceResultUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace)
  2009. {
  2010. return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
  2011. ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, false)
  2012. : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, false);
  2013. }
  2014. Var RegexHelper::RegexReplaceResultNotUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace)
  2015. {
  2016. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  2017. {
  2018. return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
  2019. ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, true)
  2020. : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, true);
  2021. }
  2022. else
  2023. {
  2024. return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
  2025. ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, false)
  2026. : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, false);
  2027. }
  2028. }
  2029. Var RegexHelper::RegexReplace(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
  2030. {
  2031. Var result = RegexHelper::RegexReplaceImpl(entryFunctionContext, thisObj, input, replace, noResult);
  2032. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2033. }
  2034. Var RegexHelper::RegexEs5Replace(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult)
  2035. {
  2036. // We can have RegexReplaceResult... functions defer their job to RegexReplace. However, their regularExpression argument
  2037. // would first be cast to RecyclableObject when the call is made, and then back to JavascriptRegExp in RegexReplaceImpl.
  2038. // The conversion back slows down the perf, so we use this ES5 version of RegexReplace in RegexReplaceResult... if we know
  2039. // that the ES6 logic isn't needed.
  2040. Var result = RegexHelper::RegexEs5ReplaceImpl(entryFunctionContext, regularExpression, input, replace, noResult);
  2041. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2042. }
  2043. Var RegexHelper::RegexReplaceFunction(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, RecyclableObject* replacefn)
  2044. {
  2045. Var result = RegexHelper::RegexReplaceImpl(entryFunctionContext, thisObj, input, replacefn);
  2046. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2047. }
  2048. Var RegexHelper::RegexSearch(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input)
  2049. {
  2050. Var result = RegexHelper::RegexSearchImpl(entryFunctionContext, regularExpression, input);
  2051. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2052. }
  2053. Var RegexHelper::RegexSplitResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
  2054. {
  2055. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false);
  2056. }
  2057. Var RegexHelper::RegexSplitResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
  2058. {
  2059. Assert(ThreadContext::IsOnStack(stackAllocationPointer));
  2060. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false, stackAllocationPointer);
  2061. }
  2062. Var RegexHelper::RegexSplitResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
  2063. {
  2064. if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
  2065. {
  2066. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, true);
  2067. }
  2068. else
  2069. {
  2070. return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false);
  2071. }
  2072. }
  2073. Var RegexHelper::RegexSplit(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
  2074. {
  2075. Var result = RegexHelper::RegexSplitImpl(entryFunctionContext, thisObj, input, limit, noResult, stackAllocationPointer);
  2076. return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
  2077. }
  2078. RecyclableObject* RegexHelper::ExecResultToRecyclableObject(Var result)
  2079. {
  2080. // "result" is the result of the "exec" call. "CallExec" makes sure that it is either
  2081. // an Object or Null. RegExp algorithms have special conditions for when the result is Null,
  2082. // so we can directly cast to RecyclableObject.
  2083. Assert(!JavascriptOperators::IsNull(result));
  2084. return UnsafeVarTo<RecyclableObject>(result);
  2085. }
  2086. JavascriptString* RegexHelper::GetMatchStrFromResult(RecyclableObject* result, ScriptContext* scriptContext)
  2087. {
  2088. return JavascriptConversion::ToString(
  2089. JavascriptOperators::GetItem(result, (uint32)0, scriptContext),
  2090. scriptContext);
  2091. }
  2092. void RegexHelper::AdvanceLastIndex(
  2093. RecyclableObject* instance,
  2094. JavascriptString* input,
  2095. JavascriptString* matchStr,
  2096. bool unicode,
  2097. ScriptContext* scriptContext)
  2098. {
  2099. if (matchStr->GetLength() == 0)
  2100. {
  2101. CharCount lastIndex = JavascriptRegExp::GetLastIndexProperty(instance, scriptContext);
  2102. lastIndex = AdvanceStringIndex(input, lastIndex, unicode);
  2103. JavascriptRegExp::SetLastIndexProperty(instance, lastIndex, scriptContext);
  2104. }
  2105. }
  2106. CharCount RegexHelper::AdvanceStringIndex(JavascriptString* string, CharCount index, bool isUnicode)
  2107. {
  2108. // TODO: Change the increment to 2 depending on the "unicode" flag and
  2109. // the code point at "index". The increment is currently constant at 1
  2110. // in order to be compatible with the rest of the RegExp code.
  2111. return JavascriptRegExp::AddIndex(index, 1);
  2112. }
  2113. }