| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "RuntimeLibraryPch.h"
- // Parser Includes
- #include "DebugWriter.h"
- #include "RegexStats.h"
- #include "OctoquadIdentifier.h"
- #include "RegexCompileTime.h"
- #include "RegexParser.h"
- #include "RegexPattern.h"
- namespace Js
- {
- // ----------------------------------------------------------------------
- // Dynamic compilation
- // ----------------------------------------------------------------------
- // See also:
- // UnifiedRegex::Parser::Options(...)
- bool RegexHelper::GetFlags(Js::ScriptContext* scriptContext, __in_ecount(strLen) const char16* str, CharCount strLen, UnifiedRegex::RegexFlags &flags)
- {
- for (CharCount i = 0; i < strLen; i++)
- {
- switch (str[i])
- {
- case 'i':
- if ((flags & UnifiedRegex::IgnoreCaseRegexFlag) != 0)
- return false;
- flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::IgnoreCaseRegexFlag);
- break;
- case 'g':
- if ((flags & UnifiedRegex::GlobalRegexFlag) != 0)
- return false;
- flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::GlobalRegexFlag);
- break;
- case 'm':
- if ((flags & UnifiedRegex::MultilineRegexFlag) != 0)
- return false;
- flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::MultilineRegexFlag);
- break;
- case 'u':
- if (scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled())
- {
- if((flags & UnifiedRegex::UnicodeRegexFlag) != 0)
- return false;
- flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::UnicodeRegexFlag);
- break;
- }
- return false;
- case 'y':
- if (scriptContext->GetConfig()->IsES6RegExStickyEnabled())
- {
- if ((flags & UnifiedRegex::StickyRegexFlag) != 0)
- return false;
- flags = (UnifiedRegex::RegexFlags)(flags | UnifiedRegex::StickyRegexFlag);
- break;
- }
- return false;
- default:
- return false;
- }
- }
- return true;
- }
- UnifiedRegex::RegexPattern* RegexHelper::CompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource)
- {
- Assert(psz != 0 && psz[csz] == 0);
- Assert(pszOpts != 0 || cszOpts == 0);
- Assert(pszOpts == 0 || pszOpts[cszOpts] == 0);
- UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
- if (pszOpts != NULL)
- {
- if (!GetFlags(scriptContext, pszOpts, cszOpts, flags))
- {
- // Compile in order to throw appropriate error for ill-formed flags
- PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
- Assert(false);
- }
- }
- if(isLiteralSource)
- {
- // The source is from a literal regex, so we're cloning a literal regex. Don't use the dynamic regex MRU map since
- // these literal regex patterns' lifetimes are tied with the function body.
- return PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
- }
- UnifiedRegex::RegexKey lookupKey(psz, csz, flags);
- UnifiedRegex::RegexPattern* pattern = nullptr;
- RegexPatternMruMap* dynamicRegexMap = scriptContext->GetDynamicRegexMap();
- if (!dynamicRegexMap->TryGetValue(lookupKey, &pattern))
- {
- pattern = PrimCompileDynamic(scriptContext, psz, csz, pszOpts, cszOpts, isLiteralSource);
- // WARNING: Must calculate key again so that dictionary has copy of source associated with the pattern
- const auto source = pattern->GetSource();
- UnifiedRegex::RegexKey finalKey(source.GetBuffer(), source.GetLength(), flags);
- dynamicRegexMap->Add(finalKey, pattern);
- }
- return pattern;
- }
- UnifiedRegex::RegexPattern* RegexHelper::CompileDynamic(
- ScriptContext *scriptContext, const char16* psz, CharCount csz, UnifiedRegex::RegexFlags flags, bool isLiteralSource)
- {
- //
- // Regex compilations are mostly string parsing based. To avoid duplicating validation rules,
- // generate a trivial options string right here on the stack and delegate to the string parsing
- // based implementation.
- //
- const CharCount OPT_BUF_SIZE = 6;
- char16 opts[OPT_BUF_SIZE];
- CharCount i = 0;
- if (flags & UnifiedRegex::IgnoreCaseRegexFlag)
- {
- opts[i++] = _u('i');
- }
- if (flags & UnifiedRegex::GlobalRegexFlag)
- {
- opts[i++] = _u('g');
- }
- if (flags & UnifiedRegex::MultilineRegexFlag)
- {
- opts[i++] = _u('m');
- }
- if (flags & UnifiedRegex::UnicodeRegexFlag)
- {
- Assert(scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
- opts[i++] = _u('u');
- }
- if (flags & UnifiedRegex::StickyRegexFlag)
- {
- Assert(scriptContext->GetConfig()->IsES6RegExStickyEnabled());
- opts[i++] = _u('y');
- }
- Assert(i < OPT_BUF_SIZE);
- opts[i] = NULL;
- return CompileDynamic(scriptContext, psz, csz, opts, i, isLiteralSource);
- }
- UnifiedRegex::RegexPattern* RegexHelper::PrimCompileDynamic(ScriptContext *scriptContext, const char16* psz, CharCount csz, const char16* pszOpts, CharCount cszOpts, bool isLiteralSource)
- {
- PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
- // SEE ALSO: Scanner<EncodingPolicy>::ScanRegExpConstant()
- #ifdef PROFILE_EXEC
- scriptContext->ProfileBegin(Js::RegexCompilePhase);
- #endif
- ArenaAllocator* rtAllocator = scriptContext->RegexAllocator();
- #if ENABLE_REGEX_CONFIG_OPTIONS
- UnifiedRegex::DebugWriter *dw = 0;
- if (REGEX_CONFIG_FLAG(RegexDebug))
- dw = scriptContext->GetRegexDebugWriter();
- UnifiedRegex::RegexStats* stats = 0;
- #endif
- UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
- if(csz == 0 && cszOpts == 0)
- {
- // Fast path for compiling the empty regex with empty flags, for the RegExp constructor object and other cases.
- // These empty regexes are dynamic regexes and so this fast path only exists for dynamic regex compilation. The
- // standard chars in particular, do not need to be initialized to compile this regex.
- UnifiedRegex::Program* program = UnifiedRegex::Program::New(scriptContext->GetRecycler(), flags);
- UnifiedRegex::Parser<NullTerminatedUnicodeEncodingPolicy, false>::CaptureEmptySourceAndNoGroups(program);
- UnifiedRegex::RegexPattern* pattern = UnifiedRegex::RegexPattern::New(scriptContext, program, false);
- UnifiedRegex::Compiler::CompileEmptyRegex
- ( program
- , pattern
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , dw
- , stats
- #endif
- );
- #ifdef PROFILE_EXEC
- scriptContext->ProfileEnd(Js::RegexCompilePhase);
- #endif
- return pattern;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- if (REGEX_CONFIG_FLAG(RegexProfile))
- scriptContext->GetRegexStatsDatabase()->BeginProfile();
- #endif
- BEGIN_TEMP_ALLOCATOR(ctAllocator, scriptContext, _u("UnifiedRegexParseAndCompile"));
- UnifiedRegex::StandardChars<char16>* standardChars = scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
- UnifiedRegex::Node* root = 0;
- UnifiedRegex::Parser<NullTerminatedUnicodeEncodingPolicy, false> parser
- ( scriptContext
- , ctAllocator
- , standardChars
- , standardChars
- , false
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , dw
- #endif
- );
- try
- {
- root = parser.ParseDynamic(psz, psz + csz, pszOpts, pszOpts + cszOpts, flags);
- }
- catch (UnifiedRegex::ParseError e)
- {
- END_TEMP_ALLOCATOR(ctAllocator, scriptContext);
- #ifdef PROFILE_EXEC
- scriptContext->ProfileEnd(Js::RegexCompilePhase);
- #endif
- Js::JavascriptError::ThrowSyntaxError(scriptContext, e.error);
- // never reached
- }
- const auto recycler = scriptContext->GetRecycler();
- UnifiedRegex::Program* program = UnifiedRegex::Program::New(recycler, flags);
- parser.CaptureSourceAndGroups(recycler, program, psz, csz, csz);
- UnifiedRegex::RegexPattern* pattern = UnifiedRegex::RegexPattern::New(scriptContext, program, isLiteralSource);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- if (REGEX_CONFIG_FLAG(RegexProfile))
- {
- stats = scriptContext->GetRegexStatsDatabase()->GetRegexStats(pattern);
- scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Parse);
- }
- if (REGEX_CONFIG_FLAG(RegexTracing))
- {
- UnifiedRegex::DebugWriter* tw = scriptContext->GetRegexDebugWriter();
- tw->Print(_u("// REGEX COMPILE "));
- pattern->Print(tw);
- tw->EOL();
- }
- if (REGEX_CONFIG_FLAG(RegexProfile))
- scriptContext->GetRegexStatsDatabase()->BeginProfile();
- #endif
- UnifiedRegex::Compiler::Compile
- ( scriptContext
- , ctAllocator
- , rtAllocator
- , standardChars
- , program
- , root
- , parser.GetLitbuf()
- , pattern
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , dw
- , stats
- #endif
- );
- #if ENABLE_REGEX_CONFIG_OPTIONS
- if (REGEX_CONFIG_FLAG(RegexProfile))
- scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Compile);
- #endif
- END_TEMP_ALLOCATOR(ctAllocator, scriptContext);
- #ifdef PROFILE_EXEC
- scriptContext->ProfileEnd(Js::RegexCompilePhase);
- #endif
- return pattern;
- }
- // ----------------------------------------------------------------------
- // Primitives
- // ----------------------------------------------------------------------
- #if ENABLE_REGEX_CONFIG_OPTIONS
- static void RegexHelperTrace(
- ScriptContext* scriptContext,
- UnifiedRegex::RegexStats::Use use,
- JavascriptRegExp* regExp,
- const char16 *const input,
- const CharCount inputLength,
- const char16 *const replace = 0,
- const CharCount replaceLength = 0)
- {
- Assert(regExp);
- Assert(input);
- if (REGEX_CONFIG_FLAG(RegexProfile))
- {
- UnifiedRegex::RegexStats* stats =
- scriptContext->GetRegexStatsDatabase()->GetRegexStats(regExp->GetPattern());
- stats->useCounts[use]++;
- stats->inputLength += inputLength;
- }
- if (REGEX_CONFIG_FLAG(RegexTracing))
- {
- UnifiedRegex::DebugWriter* w = scriptContext->GetRegexDebugWriter();
- w->Print(_u("%s("), UnifiedRegex::RegexStats::UseNames[use]);
- regExp->GetPattern()->Print(w);
- w->Print(_u(", "));
- if (!CONFIG_FLAG(Verbose) && inputLength > 1024)
- w->Print(_u("\"<string too large>\""));
- else
- w->PrintQuotedString(input, inputLength);
- if (replace != 0)
- {
- Assert(use == UnifiedRegex::RegexStats::Replace);
- w->Print(_u(", "));
- if (!CONFIG_FLAG(Verbose) && replaceLength > 1024)
- w->Print(_u("\"<string too large>\""));
- else
- w->PrintQuotedString(replace, replaceLength);
- }
- w->PrintEOL(_u(");"));
- w->Flush();
- }
- }
- static void RegexHelperTrace(ScriptContext* scriptContext, UnifiedRegex::RegexStats::Use use, JavascriptRegExp* regExp, JavascriptString* input)
- {
- Assert(regExp);
- Assert(input);
- RegexHelperTrace(scriptContext, use, regExp, input->GetString(), input->GetLength());
- }
- static void RegexHelperTrace(ScriptContext* scriptContext, UnifiedRegex::RegexStats::Use use, JavascriptRegExp* regExp, JavascriptString* input, JavascriptString* replace)
- {
- Assert(regExp);
- Assert(input);
- Assert(replace);
- RegexHelperTrace(scriptContext, use, regExp, input->GetString(), input->GetLength(), replace->GetString(), replace->GetLength());
- }
- #endif
- // ----------------------------------------------------------------------
- // Regex entry points
- // ----------------------------------------------------------------------
- struct RegexMatchState
- {
- const char16* input;
- TempArenaAllocatorObject* tempAllocatorObj;
- UnifiedRegex::Matcher* matcher;
- };
- template <bool updateHistory>
- Var RegexHelper::RegexMatchImpl(ScriptContext* scriptContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
- {
- ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
- // Normally, this check would be done in JavascriptRegExp::EntrySymbolMatch. However,
- // since the lowerer inlines String.prototype.match and directly calls the helper,
- // the check then would be bypassed. That's the reason we do the check here.
- if (scriptConfig->IsES6RegExSymbolsEnabled()
- && IsRegexSymbolMatchObservable(thisObj, scriptContext))
- {
- // We don't need to pass "updateHistory" here since the call to "exec" will handle it.
- return RegexEs6MatchImpl(scriptContext, thisObj, input, noResult, stackAllocationPointer);
- }
- else
- {
- PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
- ? _u("RegExp.prototype[Symbol.match]")
- : _u("String.prototype.match");
- JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
- return RegexEs5MatchImpl<updateHistory>(scriptContext, regularExpression, input, noResult, stackAllocationPointer);
- }
- }
- bool RegexHelper::IsRegexSymbolMatchObservable(RecyclableObject* instance, ScriptContext* scriptContext)
- {
- DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
- return !JavascriptRegExp::HasOriginalRegExType(instance)
- || JavascriptRegExp::HasObservableExec(regexPrototype)
- || JavascriptRegExp::HasObservableGlobalFlag(regexPrototype)
- || JavascriptRegExp::HasObservableUnicodeFlag(regexPrototype);
- }
- Var RegexHelper::RegexEs6MatchImpl(ScriptContext* scriptContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
- {
- PCWSTR const varName = _u("RegExp.prototype[Symbol.match]");
- if (!JavascriptRegExp::GetGlobalProperty(thisObj, scriptContext))
- {
- return JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
- }
- else
- {
- bool unicode = JavascriptRegExp::GetUnicodeProperty(thisObj, scriptContext);
- JavascriptRegExp::SetLastIndexProperty(thisObj, TaggedInt::ToVarUnchecked(0), scriptContext);
- JavascriptArray* arrayResult = nullptr;
- do
- {
- Var result = JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
- if (JavascriptOperators::IsNull(result))
- {
- break;
- }
- RecyclableObject* resultObj = ExecResultToRecyclableObject(result);
- JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
- if (arrayResult == nullptr)
- {
- arrayResult = scriptContext->GetLibrary()->CreateArray();
- }
- arrayResult->DirectAppendItem(matchStr);
- AdvanceLastIndex(thisObj, input, matchStr, unicode, scriptContext);
- }
- while (true);
- return arrayResult != nullptr
- ? arrayResult
- : scriptContext->GetLibrary()->GetNull();
- }
- }
- // String.prototype.match (ES5 15.5.4.10)
- template <bool updateHistory>
- Var RegexHelper::RegexEs5MatchImpl(ScriptContext* scriptContext, JavascriptRegExp *regularExpression, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
- {
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- const char16* inputStr = input->GetString();
- CharCount inputLength = input->GetLength();
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Match, regularExpression, input);
- #endif
- UnifiedRegex::GroupInfo lastSuccessfulMatch; // initially undefined
- UnifiedRegex::GroupInfo lastActualMatch; // initially undefined
- #ifdef REGEX_TRIGRAMS
- UnifiedRegex::TrigramAlphabet* trigramAlphabet = scriptContext->GetTrigramAlphabet();
- UnifiedRegex::TrigramInfo* trigramInfo = pattern->rep.unified.trigramInfo;
- if (trigramAlphabet != NULL && inputLength >= MinTrigramInputLength && trigramInfo != NULL)
- {
- if (trigramAlphabet->input == NULL)
- {
- trigramAlphabet->MegaMatch((char16*)inputStr, inputLength);
- }
- if (trigramInfo->isTrigramPattern)
- {
- if (trigramInfo->resultCount > 0)
- {
- lastSuccessfulMatch.offset = trigramInfo->offsets[trigramInfo->resultCount - 1];
- lastSuccessfulMatch.length = UnifiedRegex::TrigramInfo::PatternLength;
- }
- // else: leave lastMatch undefined
- // Make sure a matcher is allocated and holds valid last match in case the RegExp constructor
- // needs to fill-in details from the last match via JavascriptRegExpConstructor::EnsureValues
- Assert(pattern->rep.unified.program != 0);
- if (pattern->rep.unified.matcher == 0)
- pattern->rep.unified.matcher = UnifiedRegex::Matcher::New(scriptContext, pattern);
- *pattern->rep.unified.matcher->GroupIdToGroupInfo(0) = lastSuccessfulMatch;
- Assert(pattern->IsGlobal());
- JavascriptArray* arrayResult = CreateMatchResult(stackAllocationPointer, scriptContext, /* isGlobal */ true, pattern->NumGroups(), input);
- FinalizeMatchResult(scriptContext, /* isGlobal */ true, arrayResult, lastSuccessfulMatch);
- if (trigramInfo->resultCount > 0)
- {
- if (trigramInfo->hasCachedResultString)
- {
- for (int k = 0; k < trigramInfo->resultCount; k++)
- {
- arrayResult->DirectSetItemAt(k,
- static_cast<Js::JavascriptString*>(trigramInfo->cachedResult[k]));
- }
- }
- else
- {
- for (int k = 0; k < trigramInfo->resultCount; k++)
- {
- JavascriptString * str = SubString::New(input, trigramInfo->offsets[k], UnifiedRegex::TrigramInfo::PatternLength);
- trigramInfo->cachedResult[k] = str;
- arrayResult->DirectSetItemAt(k, str);
- }
- trigramInfo->hasCachedResultString = true;
- }
- } // otherwise, there are no results and null will be returned
- if (updateHistory)
- {
- PropagateLastMatch(scriptContext, /* isGlobal */ true, pattern->IsSticky(), regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
- }
- return lastSuccessfulMatch.IsUndefined() ? scriptContext->GetLibrary()->GetNull() : arrayResult;
- }
- }
- #endif
- // If global regex, result array holds substrings for each match, and group bindings are ignored
- // If non-global regex, result array holds overall substring and each group binding substring
- const bool isGlobal = pattern->IsGlobal();
- const bool isSticky = pattern->IsSticky();
- JavascriptArray* arrayResult = 0;
- RegexMatchState state;
- // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
- CharCount offset = 0;
- if (!isGlobal && isSticky)
- {
- offset = regularExpression->GetLastIndex();
- }
- uint32 globalIndex = 0;
- PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, false);
- do
- {
- if (offset > inputLength)
- {
- lastActualMatch.Reset();
- break;
- }
- lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
- if (lastActualMatch.IsUndefined())
- break;
- lastSuccessfulMatch = lastActualMatch;
- if (!noResult)
- {
- if (arrayResult == 0)
- arrayResult = CreateMatchResult(stackAllocationPointer, scriptContext, isGlobal, pattern->NumGroups(), input);
- JavascriptString *const matchedString = SubString::New(input, lastActualMatch.offset, lastActualMatch.length);
- if (isGlobal)
- arrayResult->DirectSetItemAt(globalIndex, matchedString);
- else
- {
- // The array's head segment up to length - 1 may not be filled. Write to the head segment element directly
- // instead of calling a helper that expects the segment to be pre-filled.
- Assert(globalIndex < arrayResult->GetHead()->length);
- static_cast<SparseArraySegment<Var> *>(arrayResult->GetHead())->elements[globalIndex] = matchedString;
- }
- globalIndex++;
- }
- offset = lastActualMatch.offset + max(lastActualMatch.length, static_cast<CharCountOrFlag>(1));
- } while (isGlobal);
- PrimEndMatch(state, scriptContext, pattern);
- if (updateHistory)
- {
- PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
- }
- if (arrayResult == 0)
- {
- return scriptContext->GetLibrary()->GetNull();
- }
- const int numGroups = pattern->NumGroups();
- if (!isGlobal)
- {
- if (numGroups > 1)
- {
- // Overall match already captured in index 0 by above, so just grab the groups
- Var nonMatchValue = NonMatchValue(scriptContext, false);
- Field(Var) *elements = ((SparseArraySegment<Var>*)arrayResult->GetHead())->elements;
- for (uint groupId = 1; groupId < (uint)numGroups; groupId++)
- {
- Assert(groupId < arrayResult->GetHead()->left + arrayResult->GetHead()->length);
- elements[groupId] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
- }
- }
- FinalizeMatchResult(scriptContext, /* isGlobal */ false, arrayResult, lastSuccessfulMatch);
- }
- else
- {
- FinalizeMatchResult(scriptContext, /* isGlobal */ true, arrayResult, lastSuccessfulMatch);
- }
- return arrayResult;
- }
- // RegExp.prototype.exec (ES5 15.10.6.2)
- Var RegexHelper::RegexExecImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer)
- {
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Exec, regularExpression, input);
- #endif
- const bool isGlobal = pattern->IsGlobal();
- const bool isSticky = pattern->IsSticky();
- CharCount offset;
- CharCount inputLength = input->GetLength();
- if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
- {
- return scriptContext->GetLibrary()->GetNull();
- }
- UnifiedRegex::GroupInfo match; // initially undefined
- if (offset <= inputLength)
- {
- const char16* inputStr = input->GetString();
- match = SimpleMatch(scriptContext, pattern, inputStr, inputLength, offset);
- }
- // else: match remains undefined
- PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, match, match, true, true);
- if (noResult || match.IsUndefined())
- {
- return scriptContext->GetLibrary()->GetNull();
- }
- const int numGroups = pattern->NumGroups();
- Assert(numGroups >= 0);
- JavascriptArray* result = CreateExecResult(stackAllocationPointer, scriptContext, numGroups, input, match);
- Var nonMatchValue = NonMatchValue(scriptContext, false);
- Field(Var) *elements = ((SparseArraySegment<Var>*)result->GetHead())->elements;
- for (uint groupId = 0; groupId < (uint)numGroups; groupId++)
- {
- Assert(groupId < result->GetHead()->left + result->GetHead()->length);
- elements[groupId] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
- }
- return result;
- }
- Var RegexHelper::RegexTest(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString *input)
- {
- if (scriptContext->GetConfig()->IsES6RegExSymbolsEnabled()
- && IsRegexTestObservable(thisObj, scriptContext))
- {
- return RegexEs6TestImpl(scriptContext, thisObj, input);
- }
- else
- {
- JavascriptRegExp* regularExpression =
- JavascriptRegExp::ToRegExp(thisObj, _u("RegExp.prototype.test"), scriptContext);
- return RegexEs5TestImpl(scriptContext, regularExpression, input);
- }
- }
- bool RegexHelper::IsRegexTestObservable(RecyclableObject* instance, ScriptContext* scriptContext)
- {
- DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
- return !JavascriptRegExp::HasOriginalRegExType(instance)
- || JavascriptRegExp::HasObservableExec(regexPrototype);
- }
- Var RegexHelper::RegexEs6TestImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString *input)
- {
- Var match = JavascriptRegExp::CallExec(thisObj, input, _u("RegExp.prototype.test"), scriptContext);
- return JavascriptBoolean::ToVar(!JavascriptOperators::IsNull(match), scriptContext);
- }
- // RegExp.prototype.test (ES5 15.10.6.3)
- Var RegexHelper::RegexEs5TestImpl(ScriptContext* scriptContext, JavascriptRegExp *regularExpression, JavascriptString *input)
- {
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- const char16* inputStr = input->GetString();
- CharCount inputLength = input->GetLength();
- UnifiedRegex::GroupInfo match; // initially undefined
- const bool isGlobal = pattern->IsGlobal();
- const bool isSticky = pattern->IsSticky();
- const bool useCache = !isGlobal && !isSticky;
- UnifiedRegex::RegExpTestCache* cache = nullptr;
- JavascriptString * cachedInput = nullptr;
- uint cacheIndex = 0;
- bool cacheHit = false;
- bool cachedResult = false;
- if (useCache)
- {
- cache = pattern->EnsureTestCache();
- cacheIndex = UnifiedRegex::RegexPattern::GetTestCacheIndex(input);
- cachedInput = cache->inputArray[cacheIndex] != nullptr ? cache->inputArray[cacheIndex]->Get() : nullptr;
- cacheHit = cachedInput == input;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Test, regularExpression, input);
- UnifiedRegex::RegexPattern::TraceTestCache(cacheHit, input, cachedInput, !useCache);
- #endif
- if (cacheHit)
- {
- Assert(useCache);
- cachedResult = (cache->resultBV.Test(cacheIndex) != 0);
- // If our cache says this test should produce a match (which we aren't going to compute),
- // notify the Ctor to invalidate the last match so it must be recomputed before access.
- if (cachedResult)
- {
- InvalidateLastMatchOnCtor(scriptContext, regularExpression, input);
- }
- // for debug builds, let's still do the real test so we can validate values in the cache
- #if !DBG
- return JavascriptBoolean::ToVar(cachedResult, scriptContext);
- #endif
- }
- CharCount offset;
- if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
- {
- if (useCache)
- {
- Assert(offset == 0);
- Assert(!cacheHit || cachedInput == input);
- Assert(!cacheHit || cachedResult == false);
- cache->inputArray[cacheIndex] = regularExpression->GetRecycler()->CreateWeakReferenceHandle(input);
- cache->resultBV.Clear(cacheIndex);
- }
- return scriptContext->GetLibrary()->GetFalse();
- }
- if (offset <= inputLength)
- {
- match = SimpleMatch(scriptContext, pattern, inputStr, inputLength, offset);
- }
- // else: match remains undefined
- PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, match, match, true, true);
- bool wasFound = !match.IsUndefined();
- if (useCache)
- {
- Assert(offset == 0);
- Assert(!cacheHit || cachedInput == input);
- Assert(!cacheHit || cachedResult == wasFound);
- cache->inputArray[cacheIndex] = regularExpression->GetRecycler()->CreateWeakReferenceHandle(input);
- if (wasFound)
- {
- cache->resultBV.Set(cacheIndex);
- }
- else
- {
- cache->resultBV.Clear(cacheIndex);
- }
- }
- return JavascriptBoolean::ToVar(wasFound, scriptContext);
- }
- template<typename GroupFn>
- void RegexHelper::ReplaceFormatString
- ( ScriptContext* scriptContext
- , int numGroups
- , GroupFn getGroup
- , JavascriptString* input
- , const char16* matchedString
- , UnifiedRegex::GroupInfo match
- , JavascriptString* replace
- , int substitutions
- , __in_ecount(substitutions) CharCount* substitutionOffsets
- , CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& concatenated )
- {
- Var nonMatchValue = NonMatchValue(scriptContext, false);
- const CharCount inputLength = input->GetLength();
- const char16* replaceStr = replace->GetString();
- const CharCount replaceLength = replace->GetLength();
- CharCount offset = 0;
- for (int i = 0; i < substitutions; i++)
- {
- CharCount substitutionOffset = substitutionOffsets[i];
- concatenated.Append(replace, offset, substitutionOffset - offset);
- char16 currentChar = replaceStr[substitutionOffset + 1];
- if (currentChar >= _u('0') && currentChar <= _u('9'))
- {
- // We've found a substitution ref, like $32. In accordance with the standard (sec-getsubstitution),
- // we recognize at most two decimal digits after the dollar sign.
- // This should be unsigned, but this would cause lots of compiler warnings unless we also make
- // numGroups unsigned, because of a comparison below.
- int captureIndex = (int)(currentChar - _u('0'));
- Assert(0 <= captureIndex && captureIndex <= 9); // numeric value of single decimal digit
- offset = substitutionOffset + 2;
- if (offset < replaceLength)
- {
- currentChar = replaceStr[substitutionOffset + 2];
- if (currentChar >= _u('0') && currentChar <= _u('9'))
- {
- // Should also be unsigned; see captureIndex above.
- int tempCaptureIndex = (10 * captureIndex) + (int)(currentChar - _u('0'));
- Assert(0 <= tempCaptureIndex && tempCaptureIndex < 100); // numeric value of 2-digit positive decimal number
- if (tempCaptureIndex < numGroups)
- {
- captureIndex = tempCaptureIndex;
- offset = substitutionOffset + 3;
- }
- }
- }
- Assert(0 <= captureIndex && captureIndex < 100); // as above, value of 2-digit positive decimal number
- if (captureIndex < numGroups && (captureIndex != 0))
- {
- Var group = getGroup(captureIndex, nonMatchValue);
- if (JavascriptString::Is(group))
- concatenated.Append(JavascriptString::UnsafeFromVar(group));
- else if (group != nonMatchValue)
- concatenated.Append(replace, substitutionOffset, offset - substitutionOffset);
- }
- else
- concatenated.Append(replace, substitutionOffset, offset - substitutionOffset);
- }
- else
- {
- switch (currentChar)
- {
- case _u('$'): // literal '$' character
- concatenated.Append(_u('$'));
- offset = substitutionOffset + 2;
- break;
- case _u('&'): // matched string
- concatenated.Append(matchedString, match.length);
- offset = substitutionOffset + 2;
- break;
- case _u('`'): // left context
- concatenated.Append(input, 0, match.offset);
- offset = substitutionOffset + 2;
- break;
- case _u('\''): // right context
- if (match.EndOffset() < inputLength)
- {
- concatenated.Append(input, match.EndOffset(), inputLength - match.EndOffset());
- }
- offset = substitutionOffset + 2;
- break;
- default:
- concatenated.Append(_u('$'));
- offset = substitutionOffset + 1;
- break;
- }
- }
- }
- concatenated.Append(replace, offset, replaceLength - offset);
- }
- int RegexHelper::GetReplaceSubstitutions(const char16 * const replaceStr, CharCount const replaceLength,
- ArenaAllocator * const tempAllocator, CharCount** const substitutionOffsetsOut)
- {
- int substitutions = 0;
- for (CharCount i = 0; i < replaceLength; i++)
- {
- if (replaceStr[i] == _u('$'))
- {
- if (++i < replaceLength)
- {
- substitutions++;
- }
- }
- }
- if (substitutions > 0)
- {
- CharCount* substitutionOffsets = AnewArray(tempAllocator, CharCount, substitutions);
- substitutions = 0;
- for (CharCount i = 0; i < replaceLength; i++)
- {
- if (replaceStr[i] == _u('$'))
- {
- if (i < (replaceLength - 1))
- {
- #pragma prefast(suppress:26000, "index doesn't overflow the buffer")
- substitutionOffsets[substitutions] = i;
- i++;
- substitutions++;
- }
- }
- }
- *substitutionOffsetsOut = substitutionOffsets;
- }
- return substitutions;
- }
- Var RegexHelper::RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
- {
- ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
- if (scriptConfig->IsES6RegExSymbolsEnabled() && IsRegexSymbolReplaceObservable(thisObj, scriptContext))
- {
- return RegexEs6ReplaceImpl(scriptContext, thisObj, input, replace, noResult);
- }
- else
- {
- PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
- ? _u("RegExp.prototype[Symbol.replace]")
- : _u("String.prototype.replace");
- JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
- return RegexEs5ReplaceImpl(scriptContext, regularExpression, input, replace, noResult);
- }
- }
- bool RegexHelper::IsRegexSymbolReplaceObservable(RecyclableObject* instance, ScriptContext* scriptContext)
- {
- DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
- return !JavascriptRegExp::HasOriginalRegExType(instance)
- || JavascriptRegExp::HasObservableUnicodeFlag(regexPrototype)
- || JavascriptRegExp::HasObservableExec(regexPrototype)
- || JavascriptRegExp::HasObservableGlobalFlag(regexPrototype);
- }
- Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
- {
- auto appendReplacement = [&](
- CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& resultBuilder,
- ArenaAllocator* tempAlloc,
- JavascriptString* matchStr,
- int numberOfCaptures,
- Field(Var)* captures,
- CharCount position)
- {
- CharCount* substitutionOffsets = nullptr;
- int substitutions = GetReplaceSubstitutions(
- replace->GetString(),
- replace->GetLength(),
- tempAlloc,
- &substitutionOffsets);
- auto getGroup = [&](int captureIndex, Var nonMatchValue) {
- return captureIndex <= numberOfCaptures ? PointerValue(captures[captureIndex]) : nonMatchValue;
- };
- UnifiedRegex::GroupInfo match(position, matchStr->GetLength());
- int numGroups = numberOfCaptures + 1; // Take group 0 into account.
- ReplaceFormatString(
- scriptContext,
- numGroups,
- getGroup,
- input,
- matchStr->GetString(),
- match,
- replace,
- substitutions,
- substitutionOffsets,
- resultBuilder);
- };
- return RegexEs6ReplaceImpl(scriptContext, thisObj, input, appendReplacement, noResult);
- }
- Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replaceFn)
- {
- auto appendReplacement = [&](
- CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)>& resultBuilder,
- ArenaAllocator* tempAlloc,
- JavascriptString* matchStr,
- int numberOfCaptures,
- Field(Var)* captures,
- CharCount position)
- {
- // replaceFn Arguments:
- //
- // 0: this
- // 1: matched
- // 2: capture1
- // ...
- // N + 1: capture N
- // N + 2: position
- // N + 3: input
- // Number of captures can be at most 99, so we won't overflow.
- ushort argCount = (ushort) numberOfCaptures + 4;
- PROBE_STACK_NO_DISPOSE(scriptContext, argCount * sizeof(Var));
- Var* args = (Var*) _alloca(argCount * sizeof(Var));
- args[0] = scriptContext->GetLibrary()->GetUndefined();
- #pragma prefast(suppress:6386, "The write is within the bounds")
- args[1] = matchStr;
- for (int i = 1; i <= numberOfCaptures; ++i)
- {
- args[i + 1] = captures[i];
- }
- args[numberOfCaptures + 2] = JavascriptNumber::ToVar(position, scriptContext);
- args[numberOfCaptures + 3] = input;
- Js::Var replaceFnResult = scriptContext->GetThreadContext()->ExecuteImplicitCall(replaceFn, Js::ImplicitCall_Accessor, [=]()->Js::Var
- {
- return replaceFn->CallFunction(Arguments(CallInfo(argCount), args));
- });
- JavascriptString* replace = JavascriptConversion::ToString(replaceFnResult, scriptContext);
- resultBuilder.Append(replace);
- };
- return RegexEs6ReplaceImpl(scriptContext, thisObj, input, appendReplacement, /* noResult */ false);
- }
- template<typename ReplacementFn>
- Var RegexHelper::RegexEs6ReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, ReplacementFn appendReplacement, bool noResult)
- {
- bool global = JavascriptRegExp::GetGlobalProperty(thisObj, scriptContext);
- bool unicode = false; // Dummy value. It isn't used below unless "global" is "true".
- if (global)
- {
- unicode = JavascriptRegExp::GetUnicodeProperty(thisObj, scriptContext);
- JavascriptRegExp::SetLastIndexProperty(thisObj, TaggedInt::ToVarUnchecked(0), scriptContext);
- }
- JavascriptString* accumulatedResult = nullptr;
- Recycler* recycler = scriptContext->GetRecycler();
- JsUtil::List<RecyclableObject*>* results = RecyclerNew(recycler, JsUtil::List<RecyclableObject*>, recycler);
- while (true)
- {
- PCWSTR varName = _u("RegExp.prototype[Symbol.replace]");
- Var result = JavascriptRegExp::CallExec(thisObj, input, varName, scriptContext);
- if (JavascriptOperators::IsNull(result))
- {
- break;
- }
- RecyclableObject* resultObj = ExecResultToRecyclableObject(result);
- results->Add(resultObj);
- if (!global)
- {
- break;
- }
- JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
- AdvanceLastIndex(thisObj, input, matchStr, unicode, scriptContext);
- }
- CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> accumulatedResultBuilder(scriptContext);
- CharCount inputLength = input->GetLength();
- CharCount nextSourcePosition = 0;
- size_t previousNumberOfCapturesToKeep = 0;
- Field(Var)* captures = nullptr;
- BEGIN_TEMP_ALLOCATOR(tempAlloc, scriptContext, _u("RegexHelper"))
- {
- results->Map([&](int resultIndex, RecyclableObject* resultObj) {
- int64 length = JavascriptConversion::ToLength(
- JavascriptOperators::GetProperty(resultObj, PropertyIds::length, scriptContext),
- scriptContext);
- uint64 numberOfCaptures = (uint64) max(length - 1, (int64) 0);
- JavascriptString* matchStr = GetMatchStrFromResult(resultObj, scriptContext);
- int64 index = JavascriptConversion::ToLength(
- JavascriptOperators::GetProperty(resultObj, PropertyIds::index, scriptContext),
- scriptContext);
- CharCount position = max(
- min(JavascriptRegExp::GetIndexOrMax(index), inputLength),
- (CharCount) 0);
- // Capture groups can be referenced using at most two digits.
- const uint64 maxNumberOfCaptures = 99;
- size_t numberOfCapturesToKeep = (size_t) min(numberOfCaptures, maxNumberOfCaptures);
- if (captures == nullptr)
- {
- captures = RecyclerNewArray(recycler, Field(Var), numberOfCapturesToKeep + 1);
- }
- else if (numberOfCapturesToKeep != previousNumberOfCapturesToKeep)
- {
- size_t existingBytes = (previousNumberOfCapturesToKeep + 1) * sizeof(Var*);
- size_t requestedBytes = (numberOfCapturesToKeep + 1) * sizeof(Var*);
- captures = (Field(Var)*) recycler->Realloc(captures, existingBytes, requestedBytes);
- }
- previousNumberOfCapturesToKeep = numberOfCapturesToKeep;
- for (uint64 i = 1; i <= numberOfCaptures; ++i)
- {
- Var nextCapture = JavascriptOperators::GetItem(resultObj, i, scriptContext);
- if (!JavascriptOperators::IsUndefined(nextCapture))
- {
- nextCapture = JavascriptConversion::ToString(nextCapture, scriptContext);
- }
- if (i <= numberOfCapturesToKeep)
- {
- captures[i] = nextCapture;
- }
- }
- if (position >= nextSourcePosition)
- {
- CharCount substringLength = position - nextSourcePosition;
- accumulatedResultBuilder.Append(input, nextSourcePosition, substringLength);
- appendReplacement(accumulatedResultBuilder, tempAlloc, matchStr, (int) numberOfCapturesToKeep, captures, position);
- nextSourcePosition = JavascriptRegExp::AddIndex(position, matchStr->GetLength());
- }
- });
- }
- END_TEMP_ALLOCATOR(tempAlloc, scriptContext);
- if (nextSourcePosition < inputLength)
- {
- CharCount substringLength = inputLength - nextSourcePosition;
- accumulatedResultBuilder.Append(input, nextSourcePosition, substringLength);
- }
- accumulatedResult = accumulatedResultBuilder.ToString();
- Assert(accumulatedResult != nullptr);
- return accumulatedResult;
- }
- // String.prototype.replace, replace value has been converted to a string (ES5 15.5.4.11)
- Var RegexHelper::RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult)
- {
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- const char16* replaceStr = replace->GetString();
- CharCount replaceLength = replace->GetLength();
- const char16* inputStr = input->GetString();
- CharCount inputLength = input->GetLength();
- JavascriptString* newString = nullptr;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Replace, regularExpression, input, replace);
- #endif
- RegexMatchState state;
- PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, true);
- UnifiedRegex::GroupInfo lastActualMatch;
- UnifiedRegex::GroupInfo lastSuccessfulMatch;
- const bool isGlobal = pattern->IsGlobal();
- const bool isSticky = pattern->IsSticky();
- // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
- CharCount offset = 0;
- if (!isGlobal && isSticky)
- {
- offset = regularExpression->GetLastIndex();
- }
- if (!noResult)
- {
- CharCount* substitutionOffsets = nullptr;
- int substitutions = GetReplaceSubstitutions(replaceStr, replaceLength,
- state.tempAllocatorObj->GetAllocator(), &substitutionOffsets);
- // Use to see if we already have partial result populated in concatenated
- CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(scriptContext);
- // If lastIndex > 0, append input[0..offset] characters to the result
- if (offset > 0)
- {
- concatenated.Append(input, 0, min(offset, inputLength));
- }
- do
- {
- if (offset > inputLength)
- {
- lastActualMatch.Reset();
- break;
- }
- lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
- if (lastActualMatch.IsUndefined())
- break;
- lastSuccessfulMatch = lastActualMatch;
- concatenated.Append(input, offset, lastActualMatch.offset - offset);
- if (substitutionOffsets != 0)
- {
- auto getGroup = [&](int captureIndex, Var nonMatchValue) {
- return GetGroup(scriptContext, pattern, input, nonMatchValue, captureIndex);
- };
- const char16* matchedString = inputStr + lastActualMatch.offset;
- ReplaceFormatString(scriptContext, pattern->NumGroups(), getGroup, input, matchedString, lastActualMatch, replace, substitutions, substitutionOffsets, concatenated);
- }
- else
- {
- concatenated.Append(replace);
- }
- if (lastActualMatch.length == 0)
- {
- if (lastActualMatch.offset < inputLength)
- {
- concatenated.Append(inputStr[lastActualMatch.offset]);
- }
- offset = lastActualMatch.offset + 1;
- }
- else
- {
- offset = lastActualMatch.EndOffset();
- }
- }
- while (isGlobal);
- if (offset == 0)
- {
- // There was no successful match so the result is the input string.
- newString = input;
- }
- else
- {
- if (offset < inputLength)
- {
- concatenated.Append(input, offset, inputLength - offset);
- }
- newString = concatenated.ToString();
- }
- substitutionOffsets = 0;
- }
- else
- {
- do
- {
- if (offset > inputLength)
- {
- lastActualMatch.Reset();
- break;
- }
- lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
- if (lastActualMatch.IsUndefined())
- break;
- lastSuccessfulMatch = lastActualMatch;
- offset = lastActualMatch.length == 0? lastActualMatch.offset + 1 : lastActualMatch.EndOffset();
- }
- while (isGlobal);
- newString = scriptContext->GetLibrary()->GetEmptyString();
- }
- PrimEndMatch(state, scriptContext, pattern);
- PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
- return newString;
- }
- Var RegexHelper::RegexReplaceImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replacefn)
- {
- ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
- if (scriptConfig->IsES6RegExSymbolsEnabled() && IsRegexSymbolReplaceObservable(thisObj, scriptContext))
- {
- return RegexEs6ReplaceImpl(scriptContext, thisObj, input, replacefn);
- }
- else
- {
- PCWSTR varName = scriptConfig->IsES6RegExSymbolsEnabled()
- ? _u("RegExp.prototype[Symbol.replace]")
- : _u("String.prototype.replace");
- JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
- return RegexEs5ReplaceImpl(scriptContext, regularExpression, input, replacefn);
- }
- }
- // String.prototype.replace, replace value is a function (ES5 15.5.4.11)
- Var RegexHelper::RegexEs5ReplaceImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptFunction* replacefn)
- {
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- JavascriptString* newString = nullptr;
- const char16* inputStr = input->GetString();
- CharCount inputLength = input->GetLength();
- const int rawNumGroups = pattern->NumGroups();
- Var nonMatchValue = NonMatchValue(scriptContext, false);
- UnifiedRegex::GroupInfo lastMatch; // initially undefined
- AssertOrFailFast(0 < rawNumGroups && rawNumGroups <= INT16_MAX);
- const uint16 numGroups = uint16(rawNumGroups);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Replace, regularExpression, input, scriptContext->GetLibrary()->CreateStringFromCppLiteral(_u("<replace function>")));
- #endif
- RegexMatchState state;
- PrimBeginMatch(state, scriptContext, pattern, inputStr, inputLength, false);
- // NOTE: These must be kept out of the scope of the try below!
- const bool isGlobal = pattern->IsGlobal();
- const bool isSticky = pattern->IsSticky();
- // If global = true, set lastIndex to 0 in case it is used in replacefn
- if (isGlobal)
- {
- regularExpression->SetLastIndex(0);
- }
- // If global = false and sticky = true, set offset = lastIndex, else set offset = 0
- CharCount offset = 0;
- if (!isGlobal && isSticky)
- {
- offset = regularExpression->GetLastIndex();
- }
- CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(scriptContext);
- UnifiedRegex::GroupInfo lastActualMatch;
- UnifiedRegex::GroupInfo lastSuccessfulMatch;
- // Replace function must be called with arguments (<function's this>, group0, ..., groupn, offset, input)
- // The garbage collector must know about this array since it is being passed back into script land
- Var* replaceArgs;
- PROBE_STACK_NO_DISPOSE(scriptContext, (numGroups + 3) * sizeof(Var));
- replaceArgs = (Var*)_alloca((numGroups + 3) * sizeof(Var));
- replaceArgs[0] = scriptContext->GetLibrary()->GetUndefined();
- replaceArgs[numGroups + 2] = input;
- if (offset > 0)
- {
- concatenated.Append(input, 0, min(offset, inputLength));
- }
- do
- {
- if (offset > inputLength)
- {
- lastActualMatch.Reset();
- break;
- }
- lastActualMatch = PrimMatch(state, scriptContext, pattern, inputLength, offset);
- if (lastActualMatch.IsUndefined())
- break;
- lastSuccessfulMatch = lastActualMatch;
- for (int groupId = 0; groupId < numGroups; groupId++)
- replaceArgs[groupId + 1] = GetGroup(scriptContext, pattern, input, nonMatchValue, groupId);
- replaceArgs[numGroups + 1] = JavascriptNumber::ToVar(lastActualMatch.offset, scriptContext);
- // The called function must see the global state updated by the current match
- // (Should the function reach into a RegExp field, the pattern will still be valid, thus there's no
- // danger of the primitive regex matcher being re-entered)
- // WARNING: We go off into script land here, which way in turn invoke a regex operation, even on the
- // same regex.
- ThreadContext* threadContext = scriptContext->GetThreadContext();
- Var replaceVar = threadContext->ExecuteImplicitCall(replacefn, ImplicitCall_Accessor, [=]()->Js::Var
- {
- return replacefn->CallFunction(Arguments(CallInfo(UInt16Math::Add(numGroups, 3)), replaceArgs));
- });
- JavascriptString* replace = JavascriptConversion::ToString(replaceVar, scriptContext);
- concatenated.Append(input, offset, lastActualMatch.offset - offset);
- concatenated.Append(replace);
- if (lastActualMatch.length == 0)
- {
- if (lastActualMatch.offset < inputLength)
- {
- concatenated.Append(inputStr[lastActualMatch.offset]);
- }
- offset = lastActualMatch.offset + 1;
- }
- else
- {
- offset = lastActualMatch.EndOffset();
- }
- }
- while (isGlobal);
- PrimEndMatch(state, scriptContext, pattern);
- if (offset == 0)
- {
- // There was no successful match so the result is the input string.
- newString = input;
- }
- else
- {
- if (offset < inputLength)
- {
- concatenated.Append(input, offset, inputLength - offset);
- }
- newString = concatenated.ToString();
- }
- PropagateLastMatch(scriptContext, isGlobal, isSticky, regularExpression, input, lastSuccessfulMatch, lastActualMatch, true, true);
- return newString;
- }
- Var RegexHelper::StringReplace(JavascriptString* match, JavascriptString* input, JavascriptString* replace)
- {
- CharCount matchedIndex = JavascriptString::strstr(input, match, true);
- if (matchedIndex == CharCountFlag)
- {
- return input;
- }
- const char16 *const replaceStr = replace->GetString();
- // Unfortunately, due to the possibility of there being $ escapes, we can't just wmemcpy the replace string. Check if we
- // have a small replace string that we can quickly scan for '$', to see if we can just wmemcpy.
- bool definitelyNoEscapes = replace->GetLength() == 0;
- if(!definitelyNoEscapes && replace->GetLength() <= 8)
- {
- CharCount i = 0;
- for(; i < replace->GetLength() && replaceStr[i] != _u('$'); ++i);
- definitelyNoEscapes = i >= replace->GetLength();
- }
- if(definitelyNoEscapes)
- {
- const char16* inputStr = input->GetString();
- const char16* prefixStr = inputStr;
- CharCount prefixLength = (CharCount)matchedIndex;
- const char16* postfixStr = inputStr + prefixLength + match->GetLength();
- CharCount postfixLength = input->GetLength() - prefixLength - match->GetLength();
- CharCount newLength = prefixLength + postfixLength + replace->GetLength();
- BufferStringBuilder bufferString(newLength, match->GetScriptContext());
- bufferString.SetContent(prefixStr, prefixLength,
- replaceStr, replace->GetLength(),
- postfixStr, postfixLength);
- return bufferString.ToString();
- }
- CompoundString::Builder<64 * sizeof(void *) / sizeof(char16)> concatenated(input->GetScriptContext());
- // Copy portion of input string that precedes the matched substring
- concatenated.Append(input, 0, matchedIndex);
- // Copy the replace string with substitutions
- CharCount i = 0, j = 0;
- for(; j < replace->GetLength(); ++j)
- {
- if(replaceStr[j] == _u('$') && j + 1 < replace->GetLength())
- {
- switch(replaceStr[j + 1])
- {
- case _u('$'): // literal '$'
- ++j;
- concatenated.Append(replace, i, j - i);
- i = j + 1;
- break;
- case _u('&'): // matched substring
- concatenated.Append(replace, i, j - i);
- concatenated.Append(match);
- ++j;
- i = j + 1;
- break;
- case _u('`'): // portion of input string that precedes the matched substring
- concatenated.Append(replace, i, j - i);
- concatenated.Append(input, 0, matchedIndex);
- ++j;
- i = j + 1;
- break;
- case _u('\''): // portion of input string that follows the matched substring
- concatenated.Append(replace, i, j - i);
- concatenated.Append(
- input,
- matchedIndex + match->GetLength(),
- input->GetLength() - matchedIndex - match->GetLength());
- ++j;
- i = j + 1;
- break;
- default: // take both the initial '$' and the following character literally
- ++j;
- }
- }
- }
- Assert(i <= j);
- concatenated.Append(replace, i, j - i);
- // Copy portion of input string that follows the matched substring
- concatenated.Append(input, matchedIndex + match->GetLength(), input->GetLength() - matchedIndex - match->GetLength());
- return concatenated.ToString();
- }
- Var RegexHelper::StringReplace(ScriptContext* scriptContext, JavascriptString* match, JavascriptString* input, JavascriptFunction* replacefn)
- {
- CharCount indexMatched = JavascriptString::strstr(input, match, true);
- Assert(match->GetScriptContext() == scriptContext);
- Assert(input->GetScriptContext() == scriptContext);
- if (indexMatched != CharCountFlag)
- {
- ThreadContext* threadContext = scriptContext->GetThreadContext();
- Var replaceVar = threadContext->ExecuteImplicitCall(replacefn, ImplicitCall_Accessor, [=]()->Js::Var
- {
- Var pThis = scriptContext->GetLibrary()->GetUndefined();
- return CALL_FUNCTION(threadContext, replacefn, CallInfo(4), pThis, match, JavascriptNumber::ToVar((int)indexMatched, scriptContext), input);
- });
- JavascriptString* replace = JavascriptConversion::ToString(replaceVar, scriptContext);
- const char16* inputStr = input->GetString();
- const char16* prefixStr = inputStr;
- CharCount prefixLength = indexMatched;
- const char16* postfixStr = inputStr + prefixLength + match->GetLength();
- CharCount postfixLength = input->GetLength() - prefixLength - match->GetLength();
- CharCount newLength = prefixLength + postfixLength + replace->GetLength();
- BufferStringBuilder bufferString(newLength, match->GetScriptContext());
- bufferString.SetContent(prefixStr, prefixLength,
- replace->GetString(), replace->GetLength(),
- postfixStr, postfixLength);
- return bufferString.ToString();
- }
- return input;
- }
- void RegexHelper::AppendSubString(ScriptContext* scriptContext, JavascriptArray* ary, JavascriptString* input, CharCount startInclusive, CharCount endExclusive)
- {
- Assert(endExclusive >= startInclusive);
- Assert(endExclusive <= input->GetLength());
- CharCount length = endExclusive - startInclusive;
- JavascriptString* subString;
- if (length == 0)
- {
- subString = scriptContext->GetLibrary()->GetEmptyString();
- }
- else if (length == 1)
- {
- subString = scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(input->GetString()[startInclusive]);
- }
- else
- {
- subString = SubString::New(input, startInclusive, length);
- }
- ary->DirectAppendItem(subString);
- }
- inline UnifiedRegex::RegexPattern *RegexHelper::GetSplitPattern(ScriptContext* scriptContext, JavascriptRegExp *regularExpression)
- {
- UnifiedRegex::RegexPattern* splitPattern = regularExpression->GetSplitPattern();
- if (!splitPattern)
- {
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- bool isSticky = (pattern->GetFlags() & UnifiedRegex::StickyRegexFlag) != 0;
- if (!isSticky)
- {
- splitPattern = pattern;
- }
- else
- {
- // When the sticky flag is present, the pattern will match the input only at
- // the beginning since "lastIndex" is set to 0 before the first iteration.
- // However, for split(), we need to look for the pattern anywhere in the input.
- //
- // One way to handle this is to use the original pattern with the sticky flag and
- // when it fails, move to the next character and retry.
- //
- // Another way, which is implemented here, is to create another pattern without the
- // sticky flag and have it automatically look for itself anywhere in the input. This
- // way, we can also take advantage of the optimizations for the global search (e.g.,
- // the Boyer-Moore string search).
- InternalString source = pattern->GetSource();
- UnifiedRegex::RegexFlags nonStickyFlags =
- static_cast<UnifiedRegex::RegexFlags>(pattern->GetFlags() & ~UnifiedRegex::StickyRegexFlag);
- splitPattern = CompileDynamic(
- scriptContext,
- source.GetBuffer(),
- source.GetLength(),
- nonStickyFlags,
- pattern->IsLiteral());
- }
- regularExpression->SetSplitPattern(splitPattern);
- }
- return splitPattern;
- }
- Var RegexHelper::RegexSplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
- {
- ScriptConfiguration const * scriptConfig = scriptContext->GetConfig();
- if (scriptConfig->IsES6RegExSymbolsEnabled()
- && IsRegexSymbolSplitObservable(thisObj, scriptContext))
- {
- return RegexEs6SplitImpl(scriptContext, thisObj, input, limit, noResult, stackAllocationPointer);
- }
- else
- {
- PCWSTR varName = scriptContext->GetConfig()->IsES6RegExSymbolsEnabled()
- ? _u("RegExp.prototype[Symbol.split]")
- : _u("String.prototype.split");
- JavascriptRegExp* regularExpression = JavascriptRegExp::ToRegExp(thisObj, varName, scriptContext);
- return RegexEs5SplitImpl(scriptContext, regularExpression, input, limit, noResult, stackAllocationPointer);
- }
- }
- bool RegexHelper::IsRegexSymbolSplitObservable(RecyclableObject* instance, ScriptContext* scriptContext)
- {
- DynamicObject* regexPrototype = scriptContext->GetLibrary()->GetRegExpPrototype();
- return !JavascriptRegExp::HasOriginalRegExType(instance)
- || JavascriptRegExp::HasObservableConstructor(regexPrototype)
- || JavascriptRegExp::HasObservableFlags(regexPrototype)
- || JavascriptRegExp::HasObservableExec(regexPrototype);
- }
- Var RegexHelper::RegexEs6SplitImpl(ScriptContext* scriptContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
- {
- PCWSTR const varName = _u("RegExp.prototype[Symbol.split]");
- JavascriptFunction* defaultConstructor = scriptContext->GetLibrary()->GetRegExpConstructor();
- RecyclableObject* speciesConstructor = JavascriptOperators::SpeciesConstructor(
- thisObj,
- defaultConstructor,
- scriptContext);
- AssertOrFailFast(JavascriptOperators::IsConstructor(speciesConstructor));
- JavascriptString* flags = JavascriptConversion::ToString(
- JavascriptOperators::GetProperty(thisObj, PropertyIds::flags, scriptContext),
- scriptContext);
- bool unicode = wcsstr(flags->GetString(), _u("u")) != nullptr;
- flags = AppendStickyToFlagsIfNeeded(flags, scriptContext);
- Var regEx = JavascriptOperators::NewObjectCreationHelper_ReentrancySafe(speciesConstructor, defaultConstructor, scriptContext->GetThreadContext(), [=]()->Js::Var
- {
- Js::Var args[] = { speciesConstructor, thisObj, flags };
- Js::CallInfo callInfo(Js::CallFlags_New, _countof(args));
- return JavascriptOperators::NewScObject(
- speciesConstructor,
- Js::Arguments(callInfo, args),
- scriptContext);
- });
- RecyclableObject* splitter = RecyclableObject::UnsafeFromVar(regEx);
- JavascriptArray* arrayResult = scriptContext->GetLibrary()->CreateArray();
- if (limit == 0)
- {
- return arrayResult;
- }
- CharCount inputLength = input->GetLength();
- if (inputLength == 0)
- {
- Var result = JavascriptRegExp::CallExec(splitter, input, varName, scriptContext);
- if (!JavascriptOperators::IsNull(result))
- {
- return arrayResult;
- }
- arrayResult->DirectAppendItem(input);
- return arrayResult;
- }
- CharCount substringStartIndex = 0; // 'p' in spec
- CharCount substringEndIndex = substringStartIndex; // 'q' in spec
- do // inputLength > 0
- {
- JavascriptRegExp::SetLastIndexProperty(splitter, substringEndIndex, scriptContext);
- Var result = JavascriptRegExp::CallExec(splitter, input, varName, scriptContext); // 'z' in spec
- if (JavascriptOperators::IsNull(result))
- {
- substringEndIndex = AdvanceStringIndex(input, substringEndIndex, unicode);
- }
- else
- {
- CharCount endIndex = JavascriptRegExp::GetLastIndexProperty(splitter, scriptContext); // 'e' in spec
- endIndex = min(endIndex, inputLength);
- if (endIndex == substringStartIndex)
- {
- substringEndIndex = AdvanceStringIndex(input, substringEndIndex, unicode);
- }
- else
- {
- AppendSubString(scriptContext, arrayResult, input, substringStartIndex, substringEndIndex);
- if (arrayResult->GetLength() == limit)
- {
- return arrayResult;
- }
- substringStartIndex = endIndex;
- RecyclableObject* resultObject = ExecResultToRecyclableObject(result);
- int64 length = JavascriptConversion::ToLength(
- JavascriptOperators::GetProperty(resultObject, PropertyIds::length, scriptContext),
- scriptContext);
- uint64 numberOfCaptures = max(length - 1, (int64) 0);
- for (uint64 i = 1; i <= numberOfCaptures; ++i)
- {
- Var nextCapture = JavascriptOperators::GetItem(resultObject, i, scriptContext);
- arrayResult->DirectAppendItem(nextCapture);
- if (arrayResult->GetLength() == limit)
- {
- return arrayResult;
- }
- }
- substringEndIndex = substringStartIndex;
- }
- }
- }
- while (substringEndIndex < inputLength);
- AppendSubString(scriptContext, arrayResult, input, substringStartIndex, substringEndIndex);
- return arrayResult;
- }
- JavascriptString* RegexHelper::AppendStickyToFlagsIfNeeded(JavascriptString* flags, ScriptContext* scriptContext)
- {
- const char16* flagsString = flags->GetString();
- if (wcsstr(flagsString, _u("y")) == nullptr)
- {
- BEGIN_TEMP_ALLOCATOR(tempAlloc, scriptContext, _u("RegexHelper"))
- {
- StringBuilder<ArenaAllocator> bs(tempAlloc, flags->GetLength() + 1);
- bs.Append(flagsString, flags->GetLength());
- bs.Append(_u('y'));
- flags = Js::JavascriptString::NewCopyBuffer(bs.Detach(), bs.Count(), scriptContext);
- }
- END_TEMP_ALLOCATOR(tempAlloc, scriptContext);
- }
- return flags;
- }
- // String.prototype.split (ES5 15.5.4.14)
- Var RegexHelper::RegexEs5SplitImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
- {
- if (noResult && scriptContext->GetConfig()->SkipSplitOnNoResult())
- {
- // TODO: Fix this so that the side effect for PropagateLastMatch is done
- return scriptContext->GetLibrary()->GetNull();
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Split, regularExpression, input);
- #endif
- JavascriptArray* ary = scriptContext->GetLibrary()->CreateArrayOnStack(stackAllocationPointer);
- if (limit == 0)
- {
- // SPECIAL CASE: Zero limit
- return ary;
- }
- UnifiedRegex::RegexPattern *splitPattern = GetSplitPattern(scriptContext, regularExpression);
- const char16* inputStr = input->GetString();
- CharCount inputLength = input->GetLength(); // s in spec
- const int numGroups = splitPattern->NumGroups();
- Var nonMatchValue = NonMatchValue(scriptContext, false);
- UnifiedRegex::GroupInfo lastSuccessfulMatch; // initially undefined
- RegexMatchState state;
- PrimBeginMatch(state, scriptContext, splitPattern, inputStr, inputLength, false);
- if (inputLength == 0)
- {
- // SPECIAL CASE: Empty string
- UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, splitPattern, inputLength, 0);
- if (match.IsUndefined())
- ary->DirectAppendItem(input);
- else
- lastSuccessfulMatch = match;
- }
- else
- {
- CharCount copyOffset = 0; // p in spec
- CharCount startOffset = 0; // q in spec
- CharCount inputLimit = inputLength;
- while (startOffset < inputLimit)
- {
- UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, splitPattern, inputLength, startOffset);
- if (match.IsUndefined())
- break;
- lastSuccessfulMatch = match;
- if (match.offset >= inputLimit)
- break;
- startOffset = match.offset;
- CharCount endOffset = match.EndOffset(); // e in spec
- if (endOffset == copyOffset)
- startOffset++;
- else
- {
- AppendSubString(scriptContext, ary, input, copyOffset, startOffset);
- if (ary->GetLength() >= limit)
- break;
- startOffset = copyOffset = endOffset;
- for (int groupId = 1; groupId < numGroups; groupId++)
- {
- ary->DirectAppendItem(GetGroup(scriptContext, splitPattern, input, nonMatchValue, groupId));
- if (ary->GetLength() >= limit)
- break;
- }
- }
- }
- if (ary->GetLength() < limit)
- AppendSubString(scriptContext, ary, input, copyOffset, inputLength);
- }
- PrimEndMatch(state, scriptContext, splitPattern);
- Assert(!splitPattern->IsSticky());
- PropagateLastMatch
- ( scriptContext
- , splitPattern->IsGlobal()
- , /* isSticky */ false
- , regularExpression
- , input
- , lastSuccessfulMatch
- , UnifiedRegex::GroupInfo()
- , /* updateRegex */ true
- , /* updateCtor */ true
- , /* useSplitPattern */ true );
- return ary;
- }
- UnifiedRegex::GroupInfo
- RegexHelper::SimpleMatch(ScriptContext * scriptContext, UnifiedRegex::RegexPattern * pattern, const char16 * input, CharCount inputLength, CharCount offset)
- {
- RegexMatchState state;
- PrimBeginMatch(state, scriptContext, pattern, input, inputLength, false);
- UnifiedRegex::GroupInfo match = PrimMatch(state, scriptContext, pattern, inputLength, offset);
- PrimEndMatch(state, scriptContext, pattern);
- return match;
- }
- // String.prototype.search (ES5 15.5.4.12)
- Var RegexHelper::RegexSearchImpl(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- const char16* inputStr = input->GetString();
- CharCount inputLength = input->GetLength();
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Search, regularExpression, input);
- #endif
- UnifiedRegex::GroupInfo match = RegexHelper::SimpleMatch(scriptContext, pattern, inputStr, inputLength, 0);
- PropagateLastMatch(scriptContext, pattern->IsGlobal(), pattern->IsSticky(), regularExpression, input, match, match, false, true);
- return JavascriptNumber::ToVar(match.IsUndefined() ? -1 : (int32)match.offset, scriptContext);
- }
- // String.prototype.split (ES5 15.5.4.14)
- Var RegexHelper::StringSplit(JavascriptString* match, JavascriptString* input, CharCount limit)
- {
- ScriptContext* scriptContext = match->GetScriptContext();
- JavascriptArray* ary;
- CharCount matchLen = match->GetLength();
- if (matchLen == 0)
- {
- CharCount count = min(input->GetLength(), limit);
- ary = scriptContext->GetLibrary()->CreateArray(count);
- const char16 * charString = input->GetString();
- for (CharCount i = 0; i < count; i++)
- {
- ary->DirectSetItemAt(i, scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(charString[i]));
- }
- }
- else
- {
- CharCount i = 0;
- CharCount offset = 0;
- ary = scriptContext->GetLibrary()->CreateArray(0);
- while (i < limit)
- {
- CharCount prevOffset = offset;
- offset = JavascriptString::strstr(input, match, false, prevOffset);
- if (offset != CharCountFlag)
- {
- ary->DirectSetItemAt(i++, SubString::New(input, prevOffset, offset-prevOffset));
- offset += max(matchLen, static_cast<CharCount>(1));
- if (offset > input->GetLength())
- break;
- }
- else
- {
- ary->DirectSetItemAt(i++, SubString::New(input, prevOffset, input->GetLength() - prevOffset));
- break;
- }
- }
- }
- return ary;
- }
- bool RegexHelper::IsResultNotUsed(CallFlags flags)
- {
- return !PHASE_OFF1(Js::RegexResultNotUsedPhase) && ((flags & CallFlags_NotUsed) != 0);
- }
- // ----------------------------------------------------------------------
- // Primitives
- // ----------------------------------------------------------------------
- void RegexHelper::PrimBeginMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, const char16* input, CharCount inputLength, bool alwaysNeedAlloc)
- {
- state.input = input;
- if (pattern->rep.unified.matcher == 0)
- pattern->rep.unified.matcher = UnifiedRegex::Matcher::New(scriptContext, pattern);
- if (alwaysNeedAlloc)
- state.tempAllocatorObj = scriptContext->GetTemporaryAllocator(_u("RegexUnifiedExecTemp"));
- else
- state.tempAllocatorObj = 0;
- }
- UnifiedRegex::GroupInfo
- RegexHelper::PrimMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, CharCount inputLength, CharCount offset)
- {
- Assert(pattern->rep.unified.program != 0);
- Assert(pattern->rep.unified.matcher != 0);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- UnifiedRegex::RegexStats* stats = 0;
- if (REGEX_CONFIG_FLAG(RegexProfile))
- {
- stats = scriptContext->GetRegexStatsDatabase()->GetRegexStats(pattern);
- scriptContext->GetRegexStatsDatabase()->BeginProfile();
- }
- UnifiedRegex::DebugWriter* w = 0;
- if (REGEX_CONFIG_FLAG(RegexTracing) && CONFIG_FLAG(Verbose))
- w = scriptContext->GetRegexDebugWriter();
- #endif
- pattern->rep.unified.matcher->Match
- (state.input
- , inputLength
- , offset
- , scriptContext
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , stats
- , w
- #endif
- );
- #if ENABLE_REGEX_CONFIG_OPTIONS
- if (REGEX_CONFIG_FLAG(RegexProfile))
- scriptContext->GetRegexStatsDatabase()->EndProfile(stats, UnifiedRegex::RegexStats::Execute);
- #endif
- return pattern->GetGroup(0);
- }
- void RegexHelper::PrimEndMatch(RegexMatchState& state, ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern)
- {
- if (state.tempAllocatorObj != 0)
- scriptContext->ReleaseTemporaryAllocator(state.tempAllocatorObj);
- }
- Var RegexHelper::NonMatchValue(ScriptContext* scriptContext, bool isGlobalCtor)
- {
- // SPEC DEVIATION: The $n properties of the RegExp ctor use empty strings rather than undefined to represent
- // the non-match value, even in ES5 mode.
- if (isGlobalCtor)
- return scriptContext->GetLibrary()->GetEmptyString();
- else
- return scriptContext->GetLibrary()->GetUndefined();
- }
- Var RegexHelper::GetString(ScriptContext* scriptContext, JavascriptString* input, Var nonMatchValue, UnifiedRegex::GroupInfo group)
- {
- if (group.IsUndefined())
- return nonMatchValue;
- switch (group.length)
- {
- case 0:
- return scriptContext->GetLibrary()->GetEmptyString();
- case 1:
- {
- const char16* inputStr = input->GetString();
- return scriptContext->GetLibrary()->GetCharStringCache().GetStringForChar(inputStr[group.offset]);
- }
- case 2:
- {
- const char16* inputStr = input->GetString();
- PropertyString* propString = scriptContext->GetPropertyString2(inputStr[group.offset], inputStr[group.offset + 1]);
- if (propString != 0)
- return propString;
- // fall-through for default
- }
- default:
- return SubString::New(input, group.offset, group.length);
- }
- }
- Var RegexHelper::GetGroup(ScriptContext* scriptContext, UnifiedRegex::RegexPattern* pattern, JavascriptString* input, Var nonMatchValue, int groupId)
- {
- return GetString(scriptContext, input, nonMatchValue, pattern->GetGroup(groupId));
- }
- // ======================================================================
- // Match results propagate into three places:
- // - The match result array. Generally the array has string entries for the overall match substring,
- // followed by final bindings for each group, plus the fields:
- // - 'input': string used in match
- // - 'index': index of first character of match in input
- // - 'lastIndex' (IE extension): one plus index of last character of match in input
- // However, for String.match with a global match, the result is an array of all match results
- // (ignoring any group bindings). But in IE8 mode we also bind the above fields to that array,
- // using the results of the last successful primitive match.
- // - The regular expression object has writable field:
- // - 'lastIndex': one plus index of last character of last match in last input
- // - 'lastInput
- // - (Host extension) The RegExp constructor object has fields:
- // - '$n': last match substrings, using "" for undefined in all modes
- // - etc (see JavascriptRegExpConstructorType.cpp)
- //
- // There are also three influences on what gets propagated where and when:
- // - Whether the regular expression is global
- // - Whether the primitive operations runs the regular expression until failure (e.g. String.match) or
- // just once (e.g. RegExp.exec), or use the underlying matching machinery implicitly (e.g. String.split).
- //
- // Here are the rules:
- // - RegExp is updated for the last *successful* primitive match, except for String.replace.
- // In particular, for String.match with a global regex, the final failing match *does not* reset RegExp.
- // - Except for String.search in EC5 mode (which does not update 'lastIndex'), the regular expressions
- // lastIndex is updated as follows:
- // - ES5 mode, if a primitive match fails then the regular expression 'lastIndex' is set to 0. In particular,
- // the final failing primitive match for String.match with a global regex forces 'lastIndex' to be reset.
- // However, if a primitive match succeeds then the regular expression 'lastIndex' is updated only for
- // a global regex.
- // for success. However:
- // - The last failing match in a String.match with a global regex does NOT reset 'lastIndex'.
- // - If the regular expression matched empty, the last index is set assuming the pattern actually matched
- // one input character. This applies even if the pattern matched empty one beyond the end of the string
- // in a String.match with a global regex (!). For our own sanity, we isolate this particular case
- // within JavascriptRegExp when setting the lastIndexVar value.
- // - In all modes, 'lastIndex' determines the starting search index only for global regular expressions.
- //
- // ======================================================================
- void RegexHelper::PropagateLastMatch
- ( ScriptContext* scriptContext
- , bool isGlobal
- , bool isSticky
- , JavascriptRegExp* regularExpression
- , JavascriptString* lastInput
- , UnifiedRegex::GroupInfo lastSuccessfulMatch
- , UnifiedRegex::GroupInfo lastActualMatch
- , bool updateRegex
- , bool updateCtor
- , bool useSplitPattern )
- {
- if (updateRegex)
- {
- PropagateLastMatchToRegex(scriptContext, isGlobal, isSticky, regularExpression, lastSuccessfulMatch, lastActualMatch);
- }
- if (updateCtor)
- {
- PropagateLastMatchToCtor(scriptContext, regularExpression, lastInput, lastSuccessfulMatch, useSplitPattern);
- }
- }
- void RegexHelper::PropagateLastMatchToRegex
- ( ScriptContext* scriptContext
- , bool isGlobal
- , bool isSticky
- , JavascriptRegExp* regularExpression
- , UnifiedRegex::GroupInfo lastSuccessfulMatch
- , UnifiedRegex::GroupInfo lastActualMatch )
- {
- if (lastActualMatch.IsUndefined())
- {
- regularExpression->SetLastIndex(0);
- }
- else if (isGlobal || isSticky)
- {
- CharCount lastIndex = lastActualMatch.EndOffset();
- Assert(lastIndex <= MaxCharCount);
- regularExpression->SetLastIndex((int32)lastIndex);
- }
- }
- void RegexHelper::PropagateLastMatchToCtor
- ( ScriptContext* scriptContext
- , JavascriptRegExp* regularExpression
- , JavascriptString* lastInput
- , UnifiedRegex::GroupInfo lastSuccessfulMatch
- , bool useSplitPattern )
- {
- Assert(lastInput);
- if (!lastSuccessfulMatch.IsUndefined())
- {
- // Notes:
- // - SPEC DEVIATION: The RegExp ctor holds some details of the last successful match on any regular expression.
- // - For updating regex ctor's stats we are using entry function's context, rather than regex context,
- // the rational is: use same context of RegExp.prototype, on which the function was called.
- // So, if you call the function with remoteContext.regexInstance.exec.call(localRegexInstance, "match string"),
- // we will update stats in the context related to the exec function, i.e. remoteContext.
- // This is consistent with other browsers
- UnifiedRegex::RegexPattern* pattern = useSplitPattern
- ? regularExpression->GetSplitPattern()
- : regularExpression->GetPattern();
- scriptContext->GetLibrary()->GetRegExpConstructor()->SetLastMatch(pattern, lastInput, lastSuccessfulMatch);
- }
- }
- void RegexHelper::InvalidateLastMatchOnCtor(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* lastInput, bool useSplitPattern)
- {
- Assert(lastInput);
- UnifiedRegex::RegexPattern* pattern = useSplitPattern
- ? regularExpression->GetSplitPattern()
- : regularExpression->GetPattern();
- scriptContext->GetLibrary()->GetRegExpConstructor()->InvalidateLastMatch(pattern, lastInput);
- }
- bool RegexHelper::GetInitialOffset(bool isGlobal, bool isSticky, JavascriptRegExp* regularExpression, CharCount inputLength, CharCount& offset)
- {
- if (isGlobal || isSticky)
- {
- offset = regularExpression->GetLastIndex();
- if (offset <= MaxCharCount)
- return true;
- else
- {
- regularExpression->SetLastIndex(0);
- return false;
- }
- }
- else
- {
- offset = 0;
- return true;
- }
- }
- JavascriptArray* RegexHelper::CreateMatchResult(void *const stackAllocationPointer, ScriptContext* scriptContext, bool isGlobal, int numGroups, JavascriptString* input)
- {
- if (isGlobal)
- {
- // Use an ordinary array, with default initial capacity
- return scriptContext->GetLibrary()->CreateArrayOnStack(stackAllocationPointer);
- }
- else
- return JavascriptRegularExpressionResult::Create(stackAllocationPointer, numGroups, input, scriptContext);
- }
- void RegexHelper::FinalizeMatchResult(ScriptContext* scriptContext, bool isGlobal, JavascriptArray* arr, UnifiedRegex::GroupInfo match)
- {
- if (!isGlobal)
- JavascriptRegularExpressionResult::SetMatch(arr, match);
- // else: arr is an ordinary array
- }
- JavascriptArray* RegexHelper::CreateExecResult(void *const stackAllocationPointer, ScriptContext* scriptContext, int numGroups, JavascriptString* input, UnifiedRegex::GroupInfo match)
- {
- JavascriptArray* res = JavascriptRegularExpressionResult::Create(stackAllocationPointer, numGroups, input, scriptContext);
- JavascriptRegularExpressionResult::SetMatch(res, match);
- return res;
- }
- template<bool mustMatchEntireInput>
- BOOL RegexHelper::RegexTest_NonScript(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength)
- {
- // This version of the function should only be used when testing the regex against a non-javascript string. That is,
- // this call was not initiated by script code. Hence, the RegExp constructor is not updated with the last match. If
- // 'mustMatchEntireInput' is true, this function also ignores the global/sticky flag and the lastIndex property, since it tests
- // for a match on the entire input string; in that case, the lastIndex property is not modified.
- UnifiedRegex::RegexPattern* pattern = regularExpression->GetPattern();
- UnifiedRegex::GroupInfo match; // initially undefined
- #if ENABLE_REGEX_CONFIG_OPTIONS
- RegexHelperTrace(scriptContext, UnifiedRegex::RegexStats::Test, regularExpression, input, inputLength);
- #endif
- const bool isGlobal = pattern->IsGlobal();
- const bool isSticky = pattern->IsSticky();
- CharCount offset;
- if (mustMatchEntireInput)
- offset = 0; // needs to match the entire input, so ignore 'lastIndex' and always start from the beginning
- else if (!GetInitialOffset(isGlobal, isSticky, regularExpression, inputLength, offset))
- return false;
- if (mustMatchEntireInput || offset <= inputLength)
- {
- match = RegexHelper::SimpleMatch(scriptContext, pattern, input, inputLength, offset);
- }
- // else: match remains undefined
- if (!mustMatchEntireInput) // don't update 'lastIndex' when mustMatchEntireInput is true since the global flag is ignored
- {
- PropagateLastMatchToRegex(scriptContext, isGlobal, isSticky, regularExpression, match, match);
- }
- return mustMatchEntireInput ? match.offset == 0 && match.length == inputLength : !match.IsUndefined();
- }
- // explicit instantiation
- template BOOL RegexHelper::RegexTest_NonScript<true>(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength);
- template BOOL RegexHelper::RegexTest_NonScript<false>(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, const char16 *const input, const CharCount inputLength);
- // Asserts if the value needs to be marshaled to target context.
- // Returns the resulting value.
- // This is supposed to be called for result/return value of the RegexXXX functions.
- // static
- template<typename T>
- T RegexHelper::CheckCrossContextAndMarshalResult(T value, ScriptContext* targetContext)
- {
- Assert(targetContext);
- Assert(!CrossSite::NeedMarshalVar(value, targetContext));
- return value;
- }
- Var RegexHelper::RegexMatchResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false);
- }
- Var RegexHelper::RegexMatchResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false, stackAllocationPointer);
- }
- Var RegexHelper::RegexMatchResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
- {
- return RegexHelper::RegexMatch(scriptContext, regularExpression, input, true);
- }
- else
- {
- return RegexHelper::RegexMatch(scriptContext, regularExpression, input, false);
- }
- }
- Var RegexHelper::RegexMatch(ScriptContext* entryFunctionContext, RecyclableObject *thisObj, JavascriptString *input, bool noResult, void *const stackAllocationPointer)
- {
- Var result = RegexHelper::RegexMatchImpl<true>(entryFunctionContext, thisObj, input, noResult, stackAllocationPointer);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- Var RegexHelper::RegexMatchNoHistory(ScriptContext* entryFunctionContext, JavascriptRegExp *regularExpression, JavascriptString *input, bool noResult)
- {
- // RegexMatchNoHistory() is used only by Intl internally and there is no need for ES6
- // observable RegExp actions. Therefore, we can directly use the ES5 logic.
- Var result = RegexHelper::RegexEs5MatchImpl<false>(entryFunctionContext, regularExpression, input, noResult);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- Var RegexHelper::RegexExecResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- return RegexHelper::RegexExec(scriptContext, regularExpression, input, false);
- }
- Var RegexHelper::RegexExecResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- return RegexHelper::RegexExec(scriptContext, regularExpression, input, false, stackAllocationPointer);
- }
- Var RegexHelper::RegexExecResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
- {
- return RegexHelper::RegexExec(scriptContext, regularExpression, input, true);
- }
- else
- {
- return RegexHelper::RegexExec(scriptContext, regularExpression, input, false);
- }
- }
- Var RegexHelper::RegexExec(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, bool noResult, void *const stackAllocationPointer)
- {
- Var result = RegexHelper::RegexExecImpl(entryFunctionContext, regularExpression, input, noResult, stackAllocationPointer);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- Var RegexHelper::RegexReplaceResultUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace)
- {
- return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
- ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, false)
- : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, false);
- }
- Var RegexHelper::RegexReplaceResultNotUsed(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace)
- {
- if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
- {
- return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
- ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, true)
- : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, true);
- }
- else
- {
- return entryFunctionContext->GetConfig()->IsES6RegExSymbolsEnabled()
- ? RegexHelper::RegexReplace(entryFunctionContext, regularExpression, input, replace, false)
- : RegexHelper::RegexEs5Replace(entryFunctionContext, regularExpression, input, replace, false);
- }
- }
- Var RegexHelper::RegexReplace(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptString* replace, bool noResult)
- {
- Var result = RegexHelper::RegexReplaceImpl(entryFunctionContext, thisObj, input, replace, noResult);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- Var RegexHelper::RegexEs5Replace(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input, JavascriptString* replace, bool noResult)
- {
- // We can have RegexReplaceResult... functions defer their job to RegexReplace. However, their regularExpression argument
- // would first be cast to RecyclableObject when the call is made, and then back to JavascriptRegExp in RegexReplaceImpl.
- // The conversion back slows down the perf, so we use this ES5 version of RegexReplace in RegexReplaceResult... if we know
- // that the ES6 logic isn't needed.
- Var result = RegexHelper::RegexEs5ReplaceImpl(entryFunctionContext, regularExpression, input, replace, noResult);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- Var RegexHelper::RegexReplaceFunction(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, JavascriptFunction* replacefn)
- {
- Var result = RegexHelper::RegexReplaceImpl(entryFunctionContext, thisObj, input, replacefn);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- Var RegexHelper::RegexSearch(ScriptContext* entryFunctionContext, JavascriptRegExp* regularExpression, JavascriptString* input)
- {
- Var result = RegexHelper::RegexSearchImpl(entryFunctionContext, regularExpression, input);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- Var RegexHelper::RegexSplitResultUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
- {
- return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false);
- }
- Var RegexHelper::RegexSplitResultUsedAndMayBeTemp(void *const stackAllocationPointer, ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
- {
- Assert(ThreadContext::IsOnStack(stackAllocationPointer));
- return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false, stackAllocationPointer);
- }
- Var RegexHelper::RegexSplitResultNotUsed(ScriptContext* scriptContext, JavascriptRegExp* regularExpression, JavascriptString* input, CharCount limit)
- {
- if (!PHASE_OFF1(Js::RegexResultNotUsedPhase))
- {
- return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, true);
- }
- else
- {
- return RegexHelper::RegexSplit(scriptContext, regularExpression, input, limit, false);
- }
- }
- Var RegexHelper::RegexSplit(ScriptContext* entryFunctionContext, RecyclableObject* thisObj, JavascriptString* input, CharCount limit, bool noResult, void *const stackAllocationPointer)
- {
- Var result = RegexHelper::RegexSplitImpl(entryFunctionContext, thisObj, input, limit, noResult, stackAllocationPointer);
- return RegexHelper::CheckCrossContextAndMarshalResult(result, entryFunctionContext);
- }
- RecyclableObject* RegexHelper::ExecResultToRecyclableObject(Var result)
- {
- // "result" is the result of the "exec" call. "CallExec" makes sure that it is either
- // an Object or Null. RegExp algorithms have special conditions for when the result is Null,
- // so we can directly cast to RecyclableObject.
- Assert(!JavascriptOperators::IsNull(result));
- return RecyclableObject::UnsafeFromVar(result);
- }
- JavascriptString* RegexHelper::GetMatchStrFromResult(RecyclableObject* result, ScriptContext* scriptContext)
- {
- return JavascriptConversion::ToString(
- JavascriptOperators::GetItem(result, (uint32)0, scriptContext),
- scriptContext);
- }
- void RegexHelper::AdvanceLastIndex(
- RecyclableObject* instance,
- JavascriptString* input,
- JavascriptString* matchStr,
- bool unicode,
- ScriptContext* scriptContext)
- {
- if (matchStr->GetLength() == 0)
- {
- CharCount lastIndex = JavascriptRegExp::GetLastIndexProperty(instance, scriptContext);
- lastIndex = AdvanceStringIndex(input, lastIndex, unicode);
- JavascriptRegExp::SetLastIndexProperty(instance, lastIndex, scriptContext);
- }
- }
- CharCount RegexHelper::AdvanceStringIndex(JavascriptString* string, CharCount index, bool isUnicode)
- {
- // TODO: Change the increment to 2 depending on the "unicode" flag and
- // the code point at "index". The increment is currently constant at 1
- // in order to be compatible with the rest of the RegExp code.
- return JavascriptRegExp::AddIndex(index, 1);
- }
- }
|