RegexCompileTime.cpp 180 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Copyright (c) ChakraCore Project Contributors. All rights reserved.
  4. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  5. //-------------------------------------------------------------------------------------------------------
  6. #include "ParserPch.h"
  7. namespace UnifiedRegex
  8. {
  9. // ----------------------------------------------------------------------
  10. // Compiler (inlines etc)
  11. // ----------------------------------------------------------------------
  12. // The VS2013 linker treats this as a redefinition of an already
  13. // defined constant and complains. So skip the declaration if we're compiling
  14. // with VS2013 or below.
  15. #if !defined(_MSC_VER) || _MSC_VER >= 1900
  16. const CharCount Compiler::initInstBufSize;
  17. #endif
  18. uint8* Compiler::Emit(size_t size)
  19. {
  20. Assert(size <= UINT32_MAX);
  21. if (instLen - instNext < size)
  22. {
  23. CharCount newLen = max(instLen, initInstBufSize);
  24. CharCount instLenPlus = (CharCount)(instLen + size - 1);
  25. // check for overflow
  26. if (instLenPlus < instLen || instLenPlus * 2 < instLenPlus)
  27. {
  28. Js::Throw::OutOfMemory();
  29. }
  30. while (newLen <= instLenPlus)
  31. {
  32. newLen *= 2;
  33. }
  34. instBuf = (uint8*)ctAllocator->Realloc(instBuf, instLen, newLen);
  35. instLen = newLen;
  36. }
  37. uint8* inst = instBuf + instNext;
  38. instNext += (CharCount)size;
  39. return inst;
  40. }
  41. template <typename T>
  42. T* Compiler::Emit()
  43. {
  44. return new(Emit(sizeof(T))) T;
  45. }
  46. #define EMIT(compiler, T, ...) (new (compiler.Emit(sizeof(T))) T(__VA_ARGS__))
  47. #define L2I(O, label) LabelToInstPointer<O##Inst>(Inst::InstTag::O, label)
  48. // Remember: The machine address of an instruction is no longer valid after a subsequent emit,
  49. // so all label fixups must be done using Compiler::GetFixup / Compiler::DoFixup
  50. // ----------------------------------------------------------------------
  51. // Node
  52. // ----------------------------------------------------------------------
  53. void Node::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  54. {
  55. Assert(false);
  56. }
  57. CharCount Node::EmitScanFirstSet(Compiler& compiler)
  58. {
  59. Assert(prevConsumes.IsExact(0));
  60. if (thisConsumes.CouldMatchEmpty())
  61. // Can't be sure of consuming something in FIRST
  62. return 0;
  63. if (firstSet->Count() > maxSyncToSetSize)
  64. // HEURISTIC: If FIRST is large we'll get too many false positives
  65. return 0;
  66. //
  67. // Compilation scheme:
  68. //
  69. // SyncTo(Char|Char2|Set)And(Consume|Continue)
  70. //
  71. Char entries[CharSet<Char>::MaxCompact];
  72. int count = firstSet->GetCompactEntries(2, entries);
  73. if (SupportsPrefixSkipping(compiler))
  74. {
  75. if (count == 1)
  76. EMIT(compiler, SyncToCharAndConsumeInst, entries[0]);
  77. else if (count == 2)
  78. EMIT(compiler, SyncToChar2SetAndConsumeInst, entries[0], entries[1]);
  79. else
  80. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  81. return 1;
  82. }
  83. else
  84. {
  85. if (count == 1)
  86. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  87. else if (count == 2)
  88. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  89. else
  90. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  91. return 0;
  92. }
  93. }
  94. bool Node::IsBetterSyncronizingNode(Compiler& compiler, Node* curr, Node* proposed)
  95. {
  96. int proposedNumLiterals = 0;
  97. CharCount proposedLength = proposed->MinSyncronizingLiteralLength(compiler, proposedNumLiterals);
  98. if (proposedLength == 0 || proposedNumLiterals > maxNumSyncLiterals)
  99. // Not a synchronizable node or too many literals.
  100. return false;
  101. if (curr == nullptr)
  102. // We'll take whatever we can get
  103. return true;
  104. int currNumLiterals = 0;
  105. CharCount currLength = curr->MinSyncronizingLiteralLength(compiler, currNumLiterals);
  106. // Lexicographic ordering based on
  107. // - whether literal length is above a threshold (above is better)
  108. // - number of literals (smaller is better)
  109. // - upper bound on backup (finite is better)
  110. // - minimum literal length (longer is better)
  111. // - actual backup upper bound (shorter is better)
  112. if (proposedLength >= preferredMinSyncToLiteralLength
  113. && currLength < preferredMinSyncToLiteralLength)
  114. {
  115. return true;
  116. }
  117. if (proposedLength < preferredMinSyncToLiteralLength
  118. && currLength >= preferredMinSyncToLiteralLength)
  119. {
  120. return false;
  121. }
  122. if (proposedNumLiterals < currNumLiterals)
  123. return true;
  124. if (proposedNumLiterals > currNumLiterals)
  125. return false;
  126. if (!proposed->prevConsumes.IsUnbounded() && curr->prevConsumes.IsUnbounded())
  127. return true;
  128. if (proposed->prevConsumes.IsUnbounded() && !curr->prevConsumes.IsUnbounded())
  129. return false;
  130. if (proposedLength > currLength)
  131. return true;
  132. if (proposedLength < currLength)
  133. return false;
  134. return proposed->prevConsumes.upper < curr->prevConsumes.upper;
  135. }
  136. bool Node::IsSingleChar(Compiler& compiler, Char& outChar) const
  137. {
  138. if (tag != Node::MatchChar)
  139. return false;
  140. const MatchCharNode* node = (const MatchCharNode*)this;
  141. if (node->isEquivClass)
  142. return false;
  143. outChar = node->cs[0];
  144. return true;
  145. }
  146. bool Node::IsBoundedWord(Compiler& compiler) const
  147. {
  148. if (tag != Node::Concat)
  149. return false;
  150. const ConcatNode* concatNode = (const ConcatNode *)this;
  151. if (concatNode->head->tag != Node::WordBoundary ||
  152. concatNode->tail == 0 ||
  153. concatNode->tail->head->tag != Node::Loop ||
  154. concatNode->tail->tail == 0 ||
  155. concatNode->tail->tail->head->tag != Node::WordBoundary ||
  156. concatNode->tail->tail->tail != 0)
  157. return false;
  158. const WordBoundaryNode* enter = (const WordBoundaryNode*)concatNode->head;
  159. const LoopNode* loop = (const LoopNode*)concatNode->tail->head;
  160. const WordBoundaryNode* leave = (const WordBoundaryNode*)concatNode->tail->tail->head;
  161. if (enter->isNegation ||
  162. !loop->isGreedy ||
  163. loop->repeats.lower != 1 ||
  164. loop->repeats.upper != CharCountFlag ||
  165. loop->body->tag != Node::MatchSet ||
  166. leave->isNegation)
  167. return false;
  168. const MatchSetNode* wordSet = (const MatchSetNode*)loop->body;
  169. if (wordSet->isNegation)
  170. return false;
  171. return wordSet->set.IsEqualTo(*compiler.standardChars->GetWordSet());
  172. }
  173. bool Node::IsBOILiteral2(Compiler& compiler) const
  174. {
  175. if (tag != Node::Concat)
  176. return false;
  177. const ConcatNode* concatNode = (const ConcatNode *)this;
  178. if ((compiler.program->flags & (IgnoreCaseRegexFlag | MultilineRegexFlag)) != 0 ||
  179. concatNode->head->tag != Node::BOL ||
  180. concatNode->tail == nullptr ||
  181. concatNode->tail->head->tag != Node::MatchLiteral ||
  182. concatNode->tail->tail != nullptr ||
  183. ((MatchLiteralNode *)concatNode->tail->head)->isEquivClass ||
  184. ((MatchLiteralNode *)concatNode->tail->head)->length != 2)
  185. {
  186. return false;
  187. }
  188. return true;
  189. }
  190. bool Node::IsLeadingTrailingSpaces(Compiler& compiler, CharCount& leftMinMatch, CharCount& rightMinMatch) const
  191. {
  192. if (tag != Node::Alt)
  193. return false;
  194. if (compiler.program->flags & MultilineRegexFlag)
  195. return false;
  196. const AltNode* altNode = (const AltNode*)this;
  197. if (altNode->head->tag != Node::Concat ||
  198. altNode->tail == 0 ||
  199. altNode->tail->head->tag != Node::Concat ||
  200. altNode->tail->tail != 0)
  201. return false;
  202. const ConcatNode* left = (const ConcatNode*)altNode->head;
  203. const ConcatNode* right = (const ConcatNode*)altNode->tail->head;
  204. if (left->head->tag != Node::BOL ||
  205. left->tail == 0 ||
  206. left->tail->head->tag != Node::Loop ||
  207. left->tail->tail != 0)
  208. return false;
  209. if (right->head->tag != Node::Loop ||
  210. right->tail == 0 ||
  211. right->tail->head->tag != Node::EOL ||
  212. right->tail->tail != 0)
  213. return false;
  214. const LoopNode* leftLoop = (const LoopNode*)left->tail->head;
  215. const LoopNode* rightLoop = (const LoopNode*)right->head;
  216. if (!leftLoop->isGreedy ||
  217. leftLoop->repeats.upper != CharCountFlag ||
  218. leftLoop->body->tag != Node::MatchSet ||
  219. !rightLoop->isGreedy ||
  220. rightLoop->repeats.upper != CharCountFlag ||
  221. rightLoop->body->tag != Node::MatchSet)
  222. return false;
  223. const MatchSetNode* leftSet = (const MatchSetNode*)leftLoop->body;
  224. const MatchSetNode* rightSet = (const MatchSetNode*)rightLoop->body;
  225. if (leftSet->isNegation ||
  226. rightSet->isNegation)
  227. return false;
  228. leftMinMatch = leftLoop->repeats.lower;
  229. rightMinMatch = rightLoop->repeats.lower;
  230. return
  231. leftSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet()) &&
  232. rightSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet());
  233. }
  234. #if ENABLE_REGEX_CONFIG_OPTIONS
  235. void Node::PrintAnnotations(DebugWriter* w) const
  236. {
  237. if (firstSet != 0)
  238. {
  239. w->PrintEOL(_u("<"));
  240. w->Indent();
  241. w->Print(_u("features: {"));
  242. bool first = true;
  243. for (uint i = Empty; i <= Assertion; i++)
  244. {
  245. if ((features & (1 << i)) != 0)
  246. {
  247. if (first)
  248. first = false;
  249. else
  250. w->Print(_u(","));
  251. switch (i)
  252. {
  253. case Empty: w->Print(_u("Empty")); break;
  254. case BOL: w->Print(_u("BOL")); break;
  255. case EOL: w->Print(_u("EOL")); break;
  256. case WordBoundary: w->Print(_u("WordBoundary")); break;
  257. case MatchLiteral: w->Print(_u("MatchLiteral")); break;
  258. case MatchChar: w->Print(_u("MatchChar")); break;
  259. case Concat: w->Print(_u("Concat")); break;
  260. case Alt: w->Print(_u("Alt")); break;
  261. case DefineGroup: w->Print(_u("DefineGroup")); break;
  262. case MatchGroup: w->Print(_u("MatchGroup")); break;
  263. case Loop: w->Print(_u("Loop")); break;
  264. case MatchSet: w->Print(_u("MatchSet")); break;
  265. case Assertion: w->Print(_u("Assertion")); break;
  266. }
  267. }
  268. }
  269. w->PrintEOL(_u("}"));
  270. w->Print(_u("firstSet: "));
  271. firstSet->Print(w);
  272. if (isFirstExact)
  273. w->Print(_u(" (exact)"));
  274. w->EOL();
  275. w->Print(_u("followSet: "));
  276. followSet->Print(w);
  277. w->EOL();
  278. w->Print(_u("prevConsumes: "));
  279. prevConsumes.Print(w);
  280. w->EOL();
  281. w->Print(_u("thisConsumes: "));
  282. thisConsumes.Print(w);
  283. w->EOL();
  284. w->Print(_u("followConsumes: "));
  285. followConsumes.Print(w);
  286. w->EOL();
  287. w->PrintEOL(_u("isThisIrrefutable: %s"), isThisIrrefutable ? _u("true") : _u("false"));
  288. w->PrintEOL(_u("isFollowIrrefutable: %s"), isFollowIrrefutable ? _u("true") : _u("false"));
  289. w->PrintEOL(_u("isWord: %s"), isWord ? _u("true") : _u("false"));
  290. w->PrintEOL(_u("isThisWillNotProgress: %s"), isThisWillNotProgress ? _u("true") : _u("false"));
  291. w->PrintEOL(_u("isThisWillNotRegress: %s"), isThisWillNotRegress ? _u("true") : _u("false"));
  292. w->PrintEOL(_u("isPrevWillNotProgress: %s"), isPrevWillNotProgress ? _u("true") : _u("false"));
  293. w->PrintEOL(_u("isPrevWillNotRegress: %s"), isPrevWillNotRegress ? _u("true") : _u("false"));
  294. w->PrintEOL(_u("isDeterministic: %s"), isDeterministic ? _u("true") : _u("false"));
  295. w->PrintEOL(_u("isNotInLoop: %s"), isNotInLoop ? _u("true") : _u("false"));
  296. w->PrintEOL(_u("isNotNegated: %s"), isNotNegated ? _u("true") : _u("false"));
  297. w->PrintEOL(_u("isAtLeastOnce: %s"), isAtLeastOnce ? _u("true") : _u("false"));
  298. w->PrintEOL(_u("hasInitialHardFailBOI: %s"), hasInitialHardFailBOI ? _u("true") : _u("false"));
  299. w->Unindent();
  300. w->PrintEOL(_u(">"));
  301. }
  302. }
  303. #endif
  304. // ----------------------------------------------------------------------
  305. // SimpleNode
  306. // ----------------------------------------------------------------------
  307. CharCount SimpleNode::LiteralLength() const
  308. {
  309. return 0;
  310. }
  311. bool SimpleNode::IsCharOrPositiveSet() const
  312. {
  313. return false;
  314. }
  315. CharCount SimpleNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  316. {
  317. return 0;
  318. }
  319. void SimpleNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  320. {
  321. }
  322. bool SimpleNode::IsRefiningAssertion(Compiler& compiler)
  323. {
  324. return tag == EOL && (compiler.program->flags & MultilineRegexFlag) != 0;
  325. }
  326. void SimpleNode::AnnotatePass0(Compiler& compiler)
  327. {
  328. isWord = false;
  329. }
  330. void SimpleNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  331. {
  332. isFirstExact = false;
  333. thisConsumes.Exact(0);
  334. isThisWillNotProgress = true;
  335. isThisWillNotRegress = true;
  336. isNotInLoop = parentNotInLoop;
  337. isAtLeastOnce = parentAtLeastOnce;
  338. isNotSpeculative = parentNotSpeculative;
  339. isNotNegated = parentNotNegated;
  340. switch (tag)
  341. {
  342. case Empty:
  343. features = HasEmpty;
  344. firstSet = compiler.standardChars->GetEmptySet();
  345. isThisIrrefutable = true;
  346. break;
  347. case BOL:
  348. features = HasBOL;
  349. firstSet = compiler.standardChars->GetFullSet();
  350. isThisIrrefutable = false;
  351. break;
  352. case EOL:
  353. features = HasEOL;
  354. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  355. firstSet = compiler.standardChars->GetNewlineSet();
  356. else
  357. firstSet = compiler.standardChars->GetEmptySet();
  358. isThisIrrefutable = false;
  359. break;
  360. default:
  361. Assert(false);
  362. }
  363. }
  364. void SimpleNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  365. {
  366. prevConsumes = accumConsumes;
  367. isPrevWillNotProgress = accumPrevWillNotProgress;
  368. isPrevWillNotRegress = accumPrevWillNotRegress;
  369. }
  370. void SimpleNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  371. {
  372. followConsumes = accumConsumes;
  373. followSet = accumFollow;
  374. isFollowIrrefutable = accumFollowIrrefutable;
  375. isFollowEOL = accumFollowEOL;
  376. hasInitialHardFailBOI = ((tag == BOL) &&
  377. prevConsumes.IsExact(0) &&
  378. (compiler.program->flags & MultilineRegexFlag) == 0 &&
  379. isAtLeastOnce &&
  380. isNotNegated &&
  381. isPrevWillNotRegress);
  382. }
  383. void SimpleNode::AnnotatePass4(Compiler& compiler)
  384. {
  385. isDeterministic = true;
  386. }
  387. bool SimpleNode::SupportsPrefixSkipping(Compiler& compiler) const
  388. {
  389. return false;
  390. }
  391. Node* SimpleNode::HeadSyncronizingNode(Compiler& compiler)
  392. {
  393. return 0;
  394. }
  395. CharCount SimpleNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  396. {
  397. return 0;
  398. }
  399. void SimpleNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  400. {
  401. Assert(false);
  402. }
  403. void SimpleNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  404. {
  405. }
  406. void SimpleNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  407. {
  408. }
  409. void SimpleNode::Emit(Compiler& compiler, CharCount& skipped)
  410. {
  411. Assert(skipped == 0);
  412. switch (tag)
  413. {
  414. case Empty:
  415. // Nothing
  416. break;
  417. case BOL:
  418. {
  419. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  420. {
  421. //
  422. // Compilation scheme:
  423. //
  424. // BOLTest
  425. //
  426. EMIT(compiler, BOLTestInst);
  427. }
  428. else
  429. {
  430. if (compiler.CurrentLabel() == 0)
  431. {
  432. // The first instruction is BOI, change the tag and only execute it once
  433. // without looping every start position
  434. compiler.SetBOIInstructionsProgramTag();
  435. }
  436. else
  437. {
  438. //
  439. // Compilation scheme:
  440. //
  441. // BOITest
  442. //
  443. // Obviously starting later in the string won't help, so can hard fail if:
  444. // - this pattern must always be matched
  445. // - not in a negative assertion
  446. // - backtracking could never rewind the input pointer
  447. //
  448. bool canHardFail = isAtLeastOnce && isNotNegated && isPrevWillNotRegress;
  449. if (canHardFail)
  450. {
  451. EMIT(compiler, BOITestInst<true>);
  452. }
  453. else
  454. {
  455. EMIT(compiler, BOITestInst<false>);
  456. }
  457. }
  458. }
  459. break;
  460. }
  461. case EOL:
  462. {
  463. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  464. {
  465. //
  466. // Compilation scheme:
  467. //
  468. // EOLTest
  469. //
  470. EMIT(compiler, EOLTestInst);
  471. }
  472. else
  473. {
  474. //
  475. // Compilation scheme:
  476. //
  477. // EOITest
  478. //
  479. // Can hard fail if
  480. // - this pattern must always be matched
  481. // - not in a negative assertion
  482. // - backtracking could never advance the input pointer
  483. //
  484. bool canHardFail = isAtLeastOnce && isNotNegated && isPrevWillNotProgress;
  485. if (canHardFail)
  486. {
  487. EMIT(compiler, EOITestInst<true>);
  488. }
  489. else
  490. {
  491. EMIT(compiler, EOITestInst<false>);
  492. }
  493. }
  494. break;
  495. }
  496. default:
  497. Assert(false);
  498. }
  499. }
  500. CharCount SimpleNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  501. {
  502. Assert(false);
  503. return 0;
  504. }
  505. bool SimpleNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  506. {
  507. return false;
  508. }
  509. bool SimpleNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  510. {
  511. return tag == Empty;
  512. }
  513. bool SimpleNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  514. {
  515. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  516. Assert(tag == Empty);
  517. if (cont == 0)
  518. {
  519. if (trie->Count() > 0)
  520. // This literal is a proper prefix of an earlier literal
  521. return false;
  522. trie->SetAccepting();
  523. return true;
  524. }
  525. return cont->BuildCharTrie(compiler, trie, 0, isAcceptFirst);
  526. }
  527. #if ENABLE_REGEX_CONFIG_OPTIONS
  528. void SimpleNode::Print(DebugWriter* w, const Char* litbuf) const
  529. {
  530. switch (tag)
  531. {
  532. case Empty:
  533. w->Print(_u("Empty")); break;
  534. case BOL:
  535. w->Print(_u("BOL")); break;
  536. case EOL:
  537. w->Print(_u("EOL")); break;
  538. default:
  539. Assert(false);
  540. }
  541. w->PrintEOL(_u("()"));
  542. PrintAnnotations(w);
  543. }
  544. #endif
  545. // ----------------------------------------------------------------------
  546. // WordBoundaryNode
  547. // ----------------------------------------------------------------------
  548. CharCount WordBoundaryNode::LiteralLength() const
  549. {
  550. return 0;
  551. }
  552. bool WordBoundaryNode::IsCharOrPositiveSet() const
  553. {
  554. return false;
  555. }
  556. CharCount WordBoundaryNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  557. {
  558. return 0;
  559. }
  560. void WordBoundaryNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  561. {
  562. // WordChars and NonWordChars sets are already case invariant
  563. }
  564. bool WordBoundaryNode::IsRefiningAssertion(Compiler& compiler)
  565. {
  566. return mustIncludeEntering != mustIncludeLeaving;
  567. }
  568. void WordBoundaryNode::AnnotatePass0(Compiler& compiler)
  569. {
  570. isWord = false;
  571. }
  572. void WordBoundaryNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  573. {
  574. features = HasWordBoundary;
  575. thisConsumes.Exact(0);
  576. isFirstExact = false;
  577. isThisIrrefutable = false;
  578. isThisWillNotProgress = true;
  579. isThisWillNotRegress = true;
  580. isNotInLoop = parentNotInLoop;
  581. isAtLeastOnce = parentAtLeastOnce;
  582. isNotSpeculative = parentNotSpeculative;
  583. isNotNegated = parentNotNegated;
  584. if (isNegation)
  585. firstSet = compiler.standardChars->GetFullSet();
  586. else
  587. {
  588. if (mustIncludeEntering && !mustIncludeLeaving)
  589. firstSet = compiler.standardChars->GetWordSet();
  590. else if (mustIncludeLeaving && !mustIncludeEntering)
  591. firstSet = compiler.standardChars->GetNonWordSet();
  592. else
  593. firstSet = compiler.standardChars->GetFullSet();
  594. }
  595. }
  596. void WordBoundaryNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  597. {
  598. prevConsumes = accumConsumes;
  599. isPrevWillNotProgress = accumPrevWillNotProgress;
  600. isPrevWillNotRegress = accumPrevWillNotRegress;
  601. }
  602. void WordBoundaryNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  603. {
  604. followConsumes = accumConsumes;
  605. followSet = accumFollow;
  606. isFollowIrrefutable = accumFollowIrrefutable;
  607. isFollowEOL = accumFollowEOL;
  608. }
  609. void WordBoundaryNode::AnnotatePass4(Compiler& compiler)
  610. {
  611. isDeterministic = true;
  612. }
  613. bool WordBoundaryNode::SupportsPrefixSkipping(Compiler& compiler) const
  614. {
  615. return false;
  616. }
  617. Node* WordBoundaryNode::HeadSyncronizingNode(Compiler& compiler)
  618. {
  619. return 0;
  620. }
  621. CharCount WordBoundaryNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  622. {
  623. return 0;
  624. }
  625. void WordBoundaryNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  626. {
  627. Assert(false);
  628. }
  629. void WordBoundaryNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  630. {
  631. }
  632. void WordBoundaryNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  633. {
  634. }
  635. void WordBoundaryNode::Emit(Compiler& compiler, CharCount& skipped)
  636. {
  637. Assert(skipped == 0);
  638. //
  639. // Compilation scheme:
  640. //
  641. // WordBoundaryTest
  642. //
  643. if (isNegation)
  644. {
  645. EMIT(compiler, WordBoundaryTestInst<true>);
  646. }
  647. else
  648. {
  649. EMIT(compiler, WordBoundaryTestInst<false>);
  650. }
  651. }
  652. CharCount WordBoundaryNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  653. {
  654. Assert(false);
  655. return 0;
  656. }
  657. bool WordBoundaryNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  658. {
  659. return false;
  660. }
  661. bool WordBoundaryNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  662. {
  663. return false;
  664. }
  665. bool WordBoundaryNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  666. {
  667. Assert(false);
  668. return false;
  669. }
  670. #if ENABLE_REGEX_CONFIG_OPTIONS
  671. void WordBoundaryNode::Print(DebugWriter* w, const Char* litbuf) const
  672. {
  673. w->PrintEOL(_u("WordBoundary(%s, %s, %s)"), isNegation ? _u("negative") : _u("positive"), mustIncludeEntering ? _u("entering") : _u("-"), mustIncludeLeaving ? _u("leaving") : _u("-"));
  674. PrintAnnotations(w);
  675. }
  676. #endif
  677. // ----------------------------------------------------------------------
  678. // MatchLiteralNode
  679. // ----------------------------------------------------------------------
  680. CharCount MatchLiteralNode::LiteralLength() const
  681. {
  682. return length;
  683. }
  684. void MatchLiteralNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  685. {
  686. // Called during parsing only, so literal always in original form
  687. Assert(!isEquivClass);
  688. Assert(litbufNext + length <= litbufLen && offset + length <= litbufLen);
  689. #pragma prefast(suppress:26000, "The error said that offset + length >= litbufLen + 1, which is incorrect due to if statement below.")
  690. if (litbufNext + length <= litbufLen && offset + length <= litbufLen) // for prefast
  691. {
  692. js_wmemcpy_s(litbuf + litbufNext, litbufLen - litbufNext, litbuf + offset, length);
  693. }
  694. litbufNext += length;
  695. }
  696. bool MatchLiteralNode::IsCharOrPositiveSet() const
  697. {
  698. return false;
  699. }
  700. CharCount MatchLiteralNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  701. {
  702. Assert(length > 1);
  703. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0
  704. && !compiler.standardChars->IsTrivialString(compiler.program->GetCaseMappingSource(), litbuf + offset, length))
  705. {
  706. // We'll need to expand each character of literal into its equivalence class
  707. isEquivClass = true;
  708. return UInt32Math::MulAdd<CaseInsensitive::EquivClassSize,0>(length);
  709. }
  710. else
  711. return length;
  712. }
  713. void MatchLiteralNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  714. {
  715. CharCount nextLit = compiler.program->rep.insts.litbufLen;
  716. if (isEquivClass)
  717. {
  718. Assert((compiler.program->flags & IgnoreCaseRegexFlag) != 0);
  719. // Expand literal according to character equivalence classes
  720. for (CharCount i = 0; i < length; i++)
  721. {
  722. compiler.standardChars->ToEquivs(
  723. compiler.program->GetCaseMappingSource(),
  724. litbuf[offset + i],
  725. compiler.program->rep.insts.litbuf + nextLit + i * CaseInsensitive::EquivClassSize);
  726. }
  727. compiler.program->rep.insts.litbufLen += length * CaseInsensitive::EquivClassSize;
  728. }
  729. else
  730. {
  731. for (CharCount i = 0; i < length; i++)
  732. compiler.program->rep.insts.litbuf[nextLit + i] = litbuf[offset + i];
  733. compiler.program->rep.insts.litbufLen += length;
  734. }
  735. offset = nextLit;
  736. }
  737. void MatchLiteralNode::AnnotatePass0(Compiler& compiler)
  738. {
  739. const Char* litbuf = compiler.program->rep.insts.litbuf;
  740. for (CharCount i = offset; i < offset + length; i++)
  741. {
  742. if (!compiler.standardChars->IsWord(litbuf[i]))
  743. {
  744. isWord = false;
  745. return;
  746. }
  747. }
  748. isWord = true;
  749. }
  750. bool MatchLiteralNode::IsRefiningAssertion(Compiler& compiler)
  751. {
  752. return false;
  753. }
  754. void MatchLiteralNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  755. {
  756. features = HasMatchLiteral;
  757. thisConsumes.Exact(length);
  758. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  759. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  760. firstSet->Set(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  761. isFirstExact = true;
  762. isThisIrrefutable = false;
  763. isThisWillNotProgress = true;
  764. isThisWillNotRegress = true;
  765. isNotInLoop = parentNotInLoop;
  766. isAtLeastOnce = parentAtLeastOnce;
  767. isNotSpeculative = parentNotSpeculative;
  768. isNotNegated = parentNotNegated;
  769. }
  770. void MatchLiteralNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  771. {
  772. prevConsumes = accumConsumes;
  773. isPrevWillNotProgress = accumPrevWillNotProgress;
  774. isPrevWillNotRegress = accumPrevWillNotRegress;
  775. }
  776. void MatchLiteralNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  777. {
  778. followConsumes = accumConsumes;
  779. followSet = accumFollow;
  780. isFollowIrrefutable = accumFollowIrrefutable;
  781. isFollowEOL = accumFollowEOL;
  782. }
  783. void MatchLiteralNode::AnnotatePass4(Compiler& compiler)
  784. {
  785. isDeterministic = true;
  786. }
  787. bool MatchLiteralNode::SupportsPrefixSkipping(Compiler& compiler) const
  788. {
  789. return true;
  790. }
  791. Node* MatchLiteralNode::HeadSyncronizingNode(Compiler& compiler)
  792. {
  793. return this;
  794. }
  795. CharCount MatchLiteralNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  796. {
  797. numLiterals++;
  798. return length;
  799. }
  800. void MatchLiteralNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  801. {
  802. ScannerMixin* scanner =
  803. scanners.Add(compiler.GetScriptContext()->GetRecycler(), compiler.GetProgram(), offset, length, isEquivClass);
  804. scanner->scanner.Setup(compiler.rtAllocator, compiler.program->rep.insts.litbuf + offset, length, isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  805. }
  806. void MatchLiteralNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  807. {
  808. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  809. bestNode = this;
  810. }
  811. void MatchLiteralNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  812. {
  813. }
  814. void MatchLiteralNode::Emit(Compiler& compiler, CharCount& skipped)
  815. {
  816. if (skipped >= length)
  817. {
  818. // Asking to skip entire literal
  819. skipped -= length;
  820. return;
  821. }
  822. //
  823. // Compilation scheme:
  824. //
  825. // Match(Char|Char4|Literal|LiteralEquiv)Inst
  826. //
  827. CharCount effectiveOffset = offset + skipped * (isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  828. CharCount effectiveLength = length - skipped;
  829. skipped -= min(skipped, length);
  830. if (effectiveLength == 1)
  831. {
  832. Char* cs = compiler.program->rep.insts.litbuf + effectiveOffset;
  833. MatchCharNode::Emit(compiler, cs, isEquivClass);
  834. }
  835. else
  836. {
  837. if (isEquivClass)
  838. EMIT(compiler, MatchLiteralEquivInst, effectiveOffset, effectiveLength);
  839. else
  840. EMIT(compiler, MatchLiteralInst, effectiveOffset, effectiveLength);
  841. }
  842. }
  843. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  844. CharCount MatchLiteralNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  845. {
  846. //
  847. // Compilation scheme:
  848. //
  849. // SyncTo(Literal|LiteralEquiv|LinearLiteral)And(Continue|Consume|Backup)
  850. //
  851. Char * litptr = compiler.program->rep.insts.litbuf + offset;
  852. if (isHeadSyncronizingNode)
  853. {
  854. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  855. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  856. if (isEquivClass)
  857. {
  858. const uint lastPatCharIndex = length - 1;
  859. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  860. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  861. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  862. {
  863. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  864. }
  865. else
  866. {
  867. EMIT(compiler, SyncToLiteralEquivAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  868. }
  869. }
  870. else if (length == 1)
  871. EMIT(compiler, SyncToCharAndConsumeInst, litptr[0]);
  872. else if (length == 2)
  873. EMIT(compiler, SyncToChar2LiteralAndConsumeInst, litptr[0], litptr[1]);
  874. else
  875. {
  876. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  877. switch (setup.GetScheme())
  878. {
  879. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  880. EMIT(compiler, SyncToLinearLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  881. break;
  882. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  883. EMIT(compiler, SyncToLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  884. break;
  885. };
  886. }
  887. return length;
  888. }
  889. else
  890. {
  891. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  892. // inside a group for instance, then we won't need to back up but we cannot consume the literal.
  893. if (prevConsumes.IsExact(0))
  894. {
  895. if (isEquivClass)
  896. {
  897. const uint lastPatCharIndex = length - 1;
  898. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  899. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  900. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  901. {
  902. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  903. }
  904. else
  905. {
  906. EMIT(compiler, SyncToLiteralEquivAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  907. }
  908. }
  909. else if (length == 1)
  910. EMIT(compiler, SyncToCharAndContinueInst, litptr[0]);
  911. else if (length == 2)
  912. EMIT(compiler, SyncToChar2LiteralAndContinueInst, litptr[0], litptr[1]);
  913. else
  914. {
  915. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  916. switch (setup.GetScheme())
  917. {
  918. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  919. EMIT(compiler, SyncToLinearLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  920. break;
  921. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  922. EMIT(compiler, SyncToLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  923. break;
  924. };
  925. }
  926. }
  927. else
  928. {
  929. if (isEquivClass)
  930. {
  931. const uint lastPatCharIndex = length - 1;
  932. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  933. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  934. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  935. {
  936. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  937. }
  938. else
  939. {
  940. EMIT(compiler, SyncToLiteralEquivAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  941. }
  942. }
  943. else if (length == 1)
  944. EMIT(compiler, SyncToCharAndBackupInst, litptr[0], prevConsumes);
  945. else if (length == 2)
  946. EMIT(compiler, SyncToChar2LiteralAndBackupInst, litptr[0], litptr[1], prevConsumes);
  947. else
  948. {
  949. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  950. switch (setup.GetScheme())
  951. {
  952. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  953. EMIT(compiler, SyncToLinearLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  954. break;
  955. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  956. EMIT(compiler, SyncToLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  957. break;
  958. };
  959. }
  960. }
  961. return 0;
  962. }
  963. }
  964. bool MatchLiteralNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  965. {
  966. // We look for octoquad patterns before converting for case-insensitivity
  967. Assert(!isEquivClass);
  968. if (!oi->CouldAppend(length))
  969. return false;
  970. for (CharCount i = 0; i < length; i++)
  971. {
  972. if (!oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), compiler.program->rep.insts.litbuf[offset + i])))
  973. return false;
  974. }
  975. return true;
  976. }
  977. bool MatchLiteralNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  978. {
  979. if (isEquivClass)
  980. // The literal would expand into length^3 alternatives
  981. return false;
  982. return true;
  983. }
  984. bool MatchLiteralNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  985. {
  986. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  987. Assert(!isEquivClass);
  988. CharTrie* tail = trie;
  989. for (CharCount i = 0; i < length; i++)
  990. {
  991. if (tail->IsAccepting())
  992. {
  993. // An earlier literal is a prefix of this literal
  994. // If isAcceptFirst, can ignore suffix of already recognized literal.
  995. // Otherwise, must fail.
  996. return isAcceptFirst;
  997. }
  998. CharTrie* newTail = tail->Add(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  999. if (tail->Count() > maxTrieArmExpansion)
  1000. return false;
  1001. tail = newTail;
  1002. }
  1003. if (cont == 0)
  1004. {
  1005. if (tail->Count() > 0)
  1006. // This literal is a proper prefix of an earlier literal
  1007. return false;
  1008. tail->SetAccepting();
  1009. }
  1010. else
  1011. {
  1012. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1013. return false;
  1014. }
  1015. return true;
  1016. }
  1017. #if ENABLE_REGEX_CONFIG_OPTIONS
  1018. void MatchLiteralNode::Print(DebugWriter* w, const Char* litbuf) const
  1019. {
  1020. w->Print(_u("MatchLiteral("));
  1021. int skip = isEquivClass ? CaseInsensitive::EquivClassSize : 1;
  1022. for (int i = 0; i < skip; i++)
  1023. {
  1024. if (i > 0)
  1025. w->Print(_u(", "));
  1026. w->Print(_u("\""));
  1027. for (CharCount j = 0; j < length; j++)
  1028. w->PrintEscapedChar(litbuf[offset + j * skip + i]);
  1029. w->Print(_u("\""));
  1030. }
  1031. w->PrintEOL(_u(")"));
  1032. PrintAnnotations(w);
  1033. }
  1034. #endif
  1035. // ----------------------------------------------------------------------
  1036. // MatchCharNode
  1037. // ----------------------------------------------------------------------
  1038. CharCount MatchCharNode::LiteralLength() const
  1039. {
  1040. return 1;
  1041. }
  1042. void MatchCharNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  1043. {
  1044. Assert(!isEquivClass);
  1045. Assert(litbufNext + 1 <= litbufLen);
  1046. if (litbufNext + 1 <= litbufLen) // for prefast
  1047. litbuf[litbufNext++] = cs[0];
  1048. }
  1049. bool MatchCharNode::IsCharOrPositiveSet() const
  1050. {
  1051. return true;
  1052. }
  1053. CharCount MatchCharNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1054. {
  1055. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0)
  1056. {
  1057. // To ensure initialization, we first default-initialize the
  1058. // whole array with a constant, and then individually set it
  1059. // to be just the first character (known to exist). This can
  1060. // hopefully be optimized to just initialize to cs[0] by the
  1061. // compiler.
  1062. Char equivs[CaseInsensitive::EquivClassSize] = { (Char)-1 };
  1063. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1064. {
  1065. equivs[i] = cs[0];
  1066. }
  1067. bool isNonTrivial = compiler.standardChars->ToEquivs(compiler.program->GetCaseMappingSource(), cs[0], equivs);
  1068. if (isNonTrivial)
  1069. {
  1070. isEquivClass = true;
  1071. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1072. {
  1073. cs[i] = equivs[i];
  1074. }
  1075. }
  1076. }
  1077. return 0;
  1078. }
  1079. void MatchCharNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1080. {
  1081. }
  1082. bool MatchCharNode::IsRefiningAssertion(Compiler& compiler)
  1083. {
  1084. return false;
  1085. }
  1086. void MatchCharNode::AnnotatePass0(Compiler& compiler)
  1087. {
  1088. // If c is a word char then all characters equivalent to c are word chars
  1089. isWord = compiler.standardChars->IsWord(cs[0]);
  1090. }
  1091. void MatchCharNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1092. {
  1093. features = HasMatchChar;
  1094. thisConsumes.Exact(1);
  1095. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1096. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1097. firstSet->Set(compiler.ctAllocator, cs[i]);
  1098. isFirstExact = true;
  1099. isThisIrrefutable = false;
  1100. isThisWillNotProgress = true;
  1101. isThisWillNotRegress = true;
  1102. isNotInLoop = parentNotInLoop;
  1103. isAtLeastOnce = parentAtLeastOnce;
  1104. isNotSpeculative = parentNotSpeculative;
  1105. isNotNegated = parentNotNegated;
  1106. }
  1107. void MatchCharNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1108. {
  1109. prevConsumes = accumConsumes;
  1110. isPrevWillNotProgress = accumPrevWillNotProgress;
  1111. isPrevWillNotRegress = accumPrevWillNotRegress;
  1112. }
  1113. void MatchCharNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1114. {
  1115. followConsumes = accumConsumes;
  1116. followSet = accumFollow;
  1117. isFollowIrrefutable = accumFollowIrrefutable;
  1118. isFollowEOL = accumFollowEOL;
  1119. }
  1120. void MatchCharNode::AnnotatePass4(Compiler& compiler)
  1121. {
  1122. isDeterministic = true;
  1123. }
  1124. bool MatchCharNode::SupportsPrefixSkipping(Compiler& compiler) const
  1125. {
  1126. return true;
  1127. }
  1128. Node* MatchCharNode::HeadSyncronizingNode(Compiler& compiler)
  1129. {
  1130. return this;
  1131. }
  1132. CharCount MatchCharNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1133. {
  1134. numLiterals++;
  1135. return 1;
  1136. }
  1137. void MatchCharNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1138. {
  1139. Assert(false);
  1140. }
  1141. void MatchCharNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1142. {
  1143. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1144. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  1145. {
  1146. bestNode = this;
  1147. }
  1148. }
  1149. void MatchCharNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1150. {
  1151. }
  1152. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  1153. void MatchCharNode::Emit(Compiler& compiler, __in_ecount(4) Char * cs, bool isEquivClass)
  1154. {
  1155. if (isEquivClass)
  1156. {
  1157. // To ensure initialization, we first default-initialize the
  1158. // whole array with a constant, and then individually set it
  1159. // to be just the first character (known to exist). This can
  1160. // hopefully be optimized to just initialize to cs[0] by the
  1161. // compiler.
  1162. Char uniqueEquivs[CaseInsensitive::EquivClassSize] = { (Char)-1 };
  1163. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1164. {
  1165. uniqueEquivs[i] = cs[0];
  1166. }
  1167. CharCount uniqueEquivCount = FindUniqueEquivs(cs, uniqueEquivs);
  1168. switch (uniqueEquivCount)
  1169. {
  1170. case 1:
  1171. EMIT(compiler, MatchCharInst, uniqueEquivs[0]);
  1172. break;
  1173. case 2:
  1174. EMIT(compiler, MatchChar2Inst, uniqueEquivs[0], uniqueEquivs[1]);
  1175. break;
  1176. case 3:
  1177. EMIT(compiler, MatchChar3Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2]);
  1178. break;
  1179. default:
  1180. EMIT(compiler, MatchChar4Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2], uniqueEquivs[3]);
  1181. break;
  1182. }
  1183. }
  1184. else
  1185. EMIT(compiler, MatchCharInst, cs[0]);
  1186. }
  1187. CharCount MatchCharNode::FindUniqueEquivs(const Char equivs[CaseInsensitive::EquivClassSize], __out_ecount(4) Char uniqueEquivs[CaseInsensitive::EquivClassSize])
  1188. {
  1189. uniqueEquivs[0] = equivs[0];
  1190. CharCount uniqueCount = 1;
  1191. for (CharCount equivIndex = 1; equivIndex < CaseInsensitive::EquivClassSize; ++equivIndex)
  1192. {
  1193. bool alreadyHave = false;
  1194. for (CharCount uniqueIndex = 0; uniqueIndex < uniqueCount; ++uniqueIndex)
  1195. {
  1196. if (uniqueEquivs[uniqueIndex] == equivs[equivIndex])
  1197. {
  1198. alreadyHave = true;
  1199. break;
  1200. }
  1201. }
  1202. if (!alreadyHave)
  1203. {
  1204. uniqueEquivs[uniqueCount] = equivs[equivIndex];
  1205. uniqueCount += 1;
  1206. }
  1207. }
  1208. return uniqueCount;
  1209. }
  1210. void MatchCharNode::Emit(Compiler& compiler, CharCount& skipped)
  1211. {
  1212. if (skipped >= 1)
  1213. {
  1214. // Asking to skip entire char
  1215. skipped--;
  1216. return;
  1217. }
  1218. //
  1219. // Compilation scheme:
  1220. //
  1221. // MatchChar(2|3|4)?
  1222. //
  1223. skipped -= min(skipped, static_cast<CharCount>(1));
  1224. Emit(compiler, cs, isEquivClass);
  1225. }
  1226. CharCount MatchCharNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1227. {
  1228. //
  1229. // Compilation scheme:
  1230. //
  1231. // SyncTo(Char|Char2Set|Set)And(Consume|Continue|Backup)
  1232. //
  1233. if (isHeadSyncronizingNode)
  1234. {
  1235. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1236. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1237. if (firstSet->IsSingleton())
  1238. EMIT(compiler, SyncToCharAndConsumeInst, firstSet->Singleton());
  1239. else
  1240. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1241. return 1;
  1242. }
  1243. else
  1244. {
  1245. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1246. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1247. // back up, we can use SyncToCharAndContinue instead.
  1248. if (prevConsumes.IsExact(0))
  1249. {
  1250. Char entries[CharSet<Char>::MaxCompact];
  1251. int count = firstSet->GetCompactEntries(2, entries);
  1252. if (count == 1)
  1253. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  1254. else if (count == 2)
  1255. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  1256. else
  1257. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1258. }
  1259. else
  1260. {
  1261. if (firstSet->IsSingleton())
  1262. EMIT(compiler, SyncToCharAndBackupInst, firstSet->Singleton(), prevConsumes);
  1263. else
  1264. EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1265. }
  1266. return 0;
  1267. }
  1268. }
  1269. bool MatchCharNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1270. {
  1271. // We look for octoquad patterns before converting for case-insensitivity
  1272. Assert(!isEquivClass);
  1273. return oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), cs[0]));
  1274. }
  1275. bool MatchCharNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1276. {
  1277. if (isEquivClass)
  1278. {
  1279. accNumAlts *= CaseInsensitive::EquivClassSize;
  1280. if (accNumAlts > maxTrieArmExpansion)
  1281. return false;
  1282. }
  1283. return true;
  1284. }
  1285. bool MatchCharNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1286. {
  1287. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1288. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1289. {
  1290. if (trie->IsAccepting())
  1291. {
  1292. // An earlier literal is a prefix of this literal
  1293. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1294. // Otherwise, must fail.
  1295. return isAcceptFirst;
  1296. }
  1297. CharTrie* tail = trie->Add(compiler.ctAllocator, cs[i]);
  1298. if (trie->Count() > maxTrieArmExpansion)
  1299. return false;
  1300. if (cont == 0)
  1301. {
  1302. if (tail->Count() > 0)
  1303. // This literal is a proper prefix of an earlier literal
  1304. return false;
  1305. tail->SetAccepting();
  1306. }
  1307. else
  1308. {
  1309. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1310. return false;
  1311. }
  1312. }
  1313. return true;
  1314. }
  1315. #if ENABLE_REGEX_CONFIG_OPTIONS
  1316. void MatchCharNode::Print(DebugWriter* w, const Char* litbuf) const
  1317. {
  1318. w->Print(_u("MatchChar("));
  1319. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1320. {
  1321. if (i > 0)
  1322. w->Print(_u(", "));
  1323. w->PrintQuotedChar(cs[i]);
  1324. }
  1325. w->PrintEOL(_u(")"));
  1326. PrintAnnotations(w);
  1327. }
  1328. #endif
  1329. // ----------------------------------------------------------------------
  1330. // MatchSetNode
  1331. // ----------------------------------------------------------------------
  1332. CharCount MatchSetNode::LiteralLength() const
  1333. {
  1334. return 0;
  1335. }
  1336. bool MatchSetNode::IsCharOrPositiveSet() const
  1337. {
  1338. return !isNegation;
  1339. }
  1340. CharCount MatchSetNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1341. {
  1342. return 0;
  1343. }
  1344. void MatchSetNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1345. {
  1346. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0 && this->needsEquivClass)
  1347. {
  1348. // If the set is negated, then at this point we could:
  1349. // (1) take each char in the set to its equivalence class and complement it
  1350. // (2) complement the set and take each char to its equivalence class
  1351. // Thankfully the spec demands (1), so we don't need to actually calculate any complement, just
  1352. // retain the isNegated flag
  1353. CharSet<Char> newSet;
  1354. // First include all the existing characters in the result set so that large ranges such as \x80-\uffff
  1355. // don't temporarily turn into a large number of small ranges corresponding to the various equivalent
  1356. // characters
  1357. newSet.UnionInPlace(compiler.rtAllocator, set);
  1358. set.ToEquivClass(compiler.rtAllocator, newSet);
  1359. set = newSet;
  1360. }
  1361. }
  1362. bool MatchSetNode::IsRefiningAssertion(Compiler& compiler)
  1363. {
  1364. return false;
  1365. }
  1366. void MatchSetNode::AnnotatePass0(Compiler& compiler)
  1367. {
  1368. if (isNegation || set.IsEmpty())
  1369. isWord = false;
  1370. else
  1371. isWord = set.IsSubsetOf(*compiler.standardChars->GetWordSet());
  1372. }
  1373. void MatchSetNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1374. {
  1375. features = HasMatchSet;
  1376. if (isNegation)
  1377. {
  1378. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1379. set.ToComplement(compiler.ctAllocator, *firstSet);
  1380. }
  1381. else
  1382. {
  1383. // CAUTION:
  1384. // Be careful not to use firstSet after deleting the node.
  1385. firstSet = &set;
  1386. }
  1387. isFirstExact = true;
  1388. // If firstSet is empty then pattern will always fail
  1389. thisConsumes.Exact(firstSet->IsEmpty() ? 0 : 1);
  1390. isThisIrrefutable = false;
  1391. isThisWillNotProgress = true;
  1392. isThisWillNotRegress = true;
  1393. isNotInLoop = parentNotInLoop;
  1394. isAtLeastOnce = parentAtLeastOnce;
  1395. isNotSpeculative = parentNotSpeculative;
  1396. isNotNegated = parentNotNegated;
  1397. }
  1398. void MatchSetNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1399. {
  1400. prevConsumes = accumConsumes;
  1401. isPrevWillNotProgress = accumPrevWillNotProgress;
  1402. isPrevWillNotRegress = accumPrevWillNotRegress;
  1403. }
  1404. void MatchSetNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1405. {
  1406. followConsumes = accumConsumes;
  1407. followSet = accumFollow;
  1408. isFollowIrrefutable = accumFollowIrrefutable;
  1409. isFollowEOL = accumFollowEOL;
  1410. }
  1411. void MatchSetNode::AnnotatePass4(Compiler& compiler)
  1412. {
  1413. isDeterministic = true;
  1414. }
  1415. bool MatchSetNode::SupportsPrefixSkipping(Compiler& compiler) const
  1416. {
  1417. return true;
  1418. }
  1419. Node* MatchSetNode::HeadSyncronizingNode(Compiler& compiler)
  1420. {
  1421. return this;
  1422. }
  1423. CharCount MatchSetNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1424. {
  1425. return 0;
  1426. }
  1427. void MatchSetNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1428. {
  1429. Assert(false);
  1430. }
  1431. void MatchSetNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1432. {
  1433. }
  1434. void MatchSetNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1435. {
  1436. }
  1437. void MatchSetNode::Emit(Compiler& compiler, CharCount& skipped)
  1438. {
  1439. if (skipped >= 1)
  1440. {
  1441. // Asking to skip entire set
  1442. skipped--;
  1443. return;
  1444. }
  1445. //
  1446. // Compilation scheme:
  1447. //
  1448. // MatchSet
  1449. //
  1450. skipped -= min(skipped, static_cast<CharCount>(1));
  1451. RuntimeCharSet<Char> *runtimeSet;
  1452. if(isNegation)
  1453. runtimeSet = &EMIT(compiler, MatchSetInst<true>)->set;
  1454. else
  1455. runtimeSet = &EMIT(compiler, MatchSetInst<false>)->set;
  1456. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1457. }
  1458. CharCount MatchSetNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1459. {
  1460. //
  1461. // Compilation scheme:
  1462. //
  1463. // SyncToSetAnd(Consume|Continue|Backup)
  1464. //
  1465. RuntimeCharSet<Char> *runtimeSet;
  1466. CharCount consumedChars;
  1467. if (isHeadSyncronizingNode)
  1468. {
  1469. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1470. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1471. if(isNegation)
  1472. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<true>)->set;
  1473. else
  1474. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<false>)->set;
  1475. consumedChars = 1;
  1476. }
  1477. else
  1478. {
  1479. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1480. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1481. // back up, we still cannot use SyncToSetAndContinue like in MatchCharNode::EmitScan, since SyncToSetAndContinue does not support negation
  1482. // sets.
  1483. if(prevConsumes.IsExact(0))
  1484. {
  1485. if(isNegation)
  1486. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<true>)->set;
  1487. else
  1488. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<false>)->set;
  1489. }
  1490. else if(isNegation)
  1491. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<true>, prevConsumes)->set;
  1492. else
  1493. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set;
  1494. consumedChars = 0;
  1495. }
  1496. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1497. return consumedChars;
  1498. }
  1499. bool MatchSetNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1500. {
  1501. if (isNegation || set.IsEmpty() || !oi->BeginUnions())
  1502. return false;
  1503. Assert(CharSet<Char>::MaxCompact >= TrigramAlphabet::AlphaCount);
  1504. Char entries[CharSet<Char>::MaxCompact];
  1505. int count = set.GetCompactEntries(TrigramAlphabet::AlphaCount, entries);
  1506. if (count < 0)
  1507. // Too many unique characters
  1508. return false;
  1509. for (int i = 0; i < count; i++)
  1510. {
  1511. if (!oi->UnionChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), entries[i])))
  1512. // Too many unique characters
  1513. return false;
  1514. }
  1515. oi->EndUnions(); // this doesn't need to be called if we return false earlier since the OctoquadPattern won't be used
  1516. return true;
  1517. }
  1518. bool MatchSetNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1519. {
  1520. if (isNegation || !set.IsCompact())
  1521. return false;
  1522. const uint count = set.Count();
  1523. if(count == 0)
  1524. return false; // empty set always fails and consumes nothing, and therefore is not a char-trie arm
  1525. accNumAlts *= count;
  1526. if (accNumAlts > maxTrieArmExpansion)
  1527. return false;
  1528. return true;
  1529. }
  1530. bool MatchSetNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1531. {
  1532. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1533. Assert(!isNegation && set.IsCompact());
  1534. Char entries[CharSet<Char>::MaxCompact];
  1535. int count = set.GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  1536. Assert(count > 0);
  1537. for (int i = 0; i < count; i++)
  1538. {
  1539. if (trie->IsAccepting())
  1540. {
  1541. // An earlier literal is a prefix of this literal
  1542. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1543. // Otherwise, must fail.
  1544. return isAcceptFirst;
  1545. }
  1546. CharTrie* tail = trie->Add(compiler.ctAllocator, entries[i]);
  1547. if (trie->Count() > maxTrieArmExpansion)
  1548. return false;
  1549. if (cont == 0)
  1550. {
  1551. if (tail->Count() > 0)
  1552. // This literal is a proper prefix of an earlier literal
  1553. return false;
  1554. tail->SetAccepting();
  1555. }
  1556. else
  1557. {
  1558. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1559. return false;
  1560. }
  1561. }
  1562. return true;
  1563. }
  1564. #if ENABLE_REGEX_CONFIG_OPTIONS
  1565. void MatchSetNode::Print(DebugWriter* w, const Char* litbuf) const
  1566. {
  1567. w->Print(_u("MatchSet(%s, "), isNegation ? _u("negative") : _u("positive"));
  1568. set.Print(w);
  1569. w->PrintEOL(_u(")"));
  1570. PrintAnnotations(w);
  1571. }
  1572. #endif
  1573. // ----------------------------------------------------------------------
  1574. // ConcatNode
  1575. // ----------------------------------------------------------------------
  1576. CharCount ConcatNode::LiteralLength() const
  1577. {
  1578. return 0;
  1579. }
  1580. bool ConcatNode::IsCharOrPositiveSet() const
  1581. {
  1582. return false;
  1583. }
  1584. CharCount ConcatNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1585. {
  1586. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1587. Assert(tail != 0);
  1588. CharCount n = 0;
  1589. #if DBG
  1590. ConcatNode* prev = 0;
  1591. #endif
  1592. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1593. {
  1594. Assert(curr->head->tag != Concat);
  1595. Assert(prev == 0 || !(prev->head->LiteralLength() > 0 && curr->head->LiteralLength() > 0));
  1596. n = UInt32Math::Add(n, curr->head->TransferPass0(compiler, litbuf));
  1597. #if DBG
  1598. prev = curr;
  1599. #endif
  1600. }
  1601. return n;
  1602. }
  1603. void ConcatNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1604. {
  1605. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1606. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1607. curr->head->TransferPass1(compiler, litbuf);
  1608. }
  1609. bool ConcatNode::IsRefiningAssertion(Compiler& compiler)
  1610. {
  1611. return false;
  1612. }
  1613. void ConcatNode::AnnotatePass0(Compiler& compiler)
  1614. {
  1615. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1616. Node* prev = 0;
  1617. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1618. {
  1619. curr->head->AnnotatePass0(compiler);
  1620. if (prev != 0)
  1621. {
  1622. if (curr->head->tag == WordBoundary && prev->isWord)
  1623. {
  1624. WordBoundaryNode* wb = (WordBoundaryNode*)curr->head;
  1625. wb->mustIncludeLeaving = true;
  1626. }
  1627. else if (prev->tag == WordBoundary && curr->head->isWord)
  1628. {
  1629. WordBoundaryNode* wb = (WordBoundaryNode*)prev;
  1630. wb->mustIncludeEntering = true;
  1631. }
  1632. }
  1633. prev = curr->head;
  1634. }
  1635. }
  1636. void ConcatNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1637. {
  1638. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1639. features = HasConcat;
  1640. isNotInLoop = parentNotInLoop;
  1641. isAtLeastOnce = parentAtLeastOnce;
  1642. isNotSpeculative = parentNotSpeculative;
  1643. isNotNegated = parentNotNegated;
  1644. // Pass 1: Count items
  1645. int n = 0;
  1646. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1647. n++;
  1648. // Pass 2: Annotate each item, accumulate features, consumes, find longest prefix of possibly-empty-accepting items,
  1649. // check if all items are irrefutable
  1650. int emptyPrefix = 0;
  1651. thisConsumes.Exact(0);
  1652. isThisIrrefutable = true;
  1653. isThisWillNotProgress = true;
  1654. isThisWillNotRegress = true;
  1655. int item = 0;
  1656. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1657. {
  1658. curr->head->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, isNotNegated);
  1659. features |= curr->head->features;
  1660. thisConsumes.Add(curr->head->thisConsumes);
  1661. if (!curr->head->isThisIrrefutable)
  1662. isThisIrrefutable = false;
  1663. if (!curr->head->isThisWillNotProgress)
  1664. isThisWillNotProgress = false;
  1665. if (!curr->head->isThisWillNotRegress)
  1666. isThisWillNotRegress = false;
  1667. if (emptyPrefix == item && curr->head->thisConsumes.CouldMatchEmpty())
  1668. emptyPrefix++;
  1669. }
  1670. if (emptyPrefix == 0)
  1671. {
  1672. firstSet = head->firstSet;
  1673. isFirstExact = head->isFirstExact;
  1674. }
  1675. else
  1676. {
  1677. // Pass 3: Overall first set is union of first's of emptyPrefx
  1678. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1679. isFirstExact = true;
  1680. item = 0;
  1681. for (ConcatNode* curr = this; item <= min(emptyPrefix, n - 1); curr = curr->tail, item++)
  1682. {
  1683. AnalysisAssert(curr);
  1684. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1685. if (!curr->head->isFirstExact)
  1686. isFirstExact = false;
  1687. }
  1688. }
  1689. }
  1690. void ConcatNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1691. {
  1692. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1693. prevConsumes = accumConsumes;
  1694. isPrevWillNotProgress = accumPrevWillNotProgress;
  1695. isPrevWillNotRegress = accumPrevWillNotRegress;
  1696. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1697. {
  1698. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1699. accumConsumes.Add(curr->head->thisConsumes);
  1700. if (!curr->head->isThisWillNotProgress)
  1701. accumPrevWillNotProgress = false;
  1702. if (!curr->head->isThisWillNotRegress)
  1703. accumPrevWillNotRegress = false;
  1704. }
  1705. }
  1706. void ConcatNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1707. {
  1708. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1709. followConsumes = accumConsumes;
  1710. followSet = accumFollow;
  1711. isFollowIrrefutable = accumFollowIrrefutable;
  1712. isFollowEOL = accumFollowEOL;
  1713. // Pass 1: Count items
  1714. int n = 0;
  1715. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1716. n++;
  1717. // Pass 2: Collect items so we can enumerate backwards
  1718. Node** nodes = AnewArray(compiler.ctAllocator, Node *, n);
  1719. int item = 0;
  1720. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1721. nodes[item] = curr->head;
  1722. // Pass 3: Work backwards propagating follow set, irrefutability and FollowEndLineOrPattern, and adding consumes
  1723. CharSet<Char>* innerFollow = accumFollow;
  1724. for (item = n - 1; item >= 0; item--)
  1725. {
  1726. nodes[item]->AnnotatePass3(compiler, accumConsumes, innerFollow, accumFollowIrrefutable, accumFollowEOL);
  1727. if (!nodes[item]->isThisIrrefutable)
  1728. accumFollowIrrefutable = false;
  1729. if (!nodes[item]->IsEmptyOnly() && (compiler.program->flags & MultilineRegexFlag) == 0)
  1730. accumFollowEOL = nodes[item]->tag == EOL;
  1731. // ConcatNode has hardfail BOI test if any child has hardfail BOI
  1732. hasInitialHardFailBOI = hasInitialHardFailBOI || nodes[item]->hasInitialHardFailBOI;
  1733. if (item > 0)
  1734. {
  1735. CharSet<Char>* nextInnerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  1736. if (nodes[item]->thisConsumes.CouldMatchEmpty() && !nodes[item]->IsRefiningAssertion(compiler))
  1737. {
  1738. // Later follows can shine through this item to the previous item
  1739. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *innerFollow);
  1740. }
  1741. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *nodes[item]->firstSet);
  1742. innerFollow = nextInnerFollow;
  1743. accumConsumes.Add(nodes[item]->thisConsumes);
  1744. }
  1745. }
  1746. }
  1747. void ConcatNode::AnnotatePass4(Compiler& compiler)
  1748. {
  1749. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1750. isDeterministic = true;
  1751. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1752. {
  1753. curr->head->AnnotatePass4(compiler);
  1754. if (!curr->head->isDeterministic)
  1755. isDeterministic = false;
  1756. }
  1757. }
  1758. bool ConcatNode::SupportsPrefixSkipping(Compiler& compiler) const
  1759. {
  1760. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1761. int prefix = 0;
  1762. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1763. {
  1764. if (curr->head->SupportsPrefixSkipping(compiler))
  1765. prefix++;
  1766. else
  1767. break;
  1768. }
  1769. return prefix > 0;
  1770. }
  1771. Node* ConcatNode::HeadSyncronizingNode(Compiler& compiler)
  1772. {
  1773. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1774. return head->HeadSyncronizingNode(compiler);
  1775. }
  1776. void ConcatNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1777. {
  1778. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  1779. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1780. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  1781. }
  1782. CharCount ConcatNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1783. {
  1784. return 0;
  1785. }
  1786. void ConcatNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1787. {
  1788. Assert(false);
  1789. }
  1790. void ConcatNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1791. {
  1792. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1793. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1794. curr->head->BestSyncronizingNode(compiler, bestNode);
  1795. }
  1796. void ConcatNode::Emit(Compiler& compiler, CharCount& skipped)
  1797. {
  1798. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1799. //
  1800. // Compilation scheme:
  1801. //
  1802. // <item 1>
  1803. // ...
  1804. // <item n>
  1805. //
  1806. // :-)
  1807. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1808. curr->head->Emit(compiler, skipped);
  1809. }
  1810. CharCount ConcatNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1811. {
  1812. Assert(false);
  1813. return 0;
  1814. }
  1815. bool ConcatNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1816. {
  1817. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1818. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1819. {
  1820. if (!curr->head->IsOctoquad(compiler, oi))
  1821. return false;
  1822. }
  1823. return true;
  1824. }
  1825. bool ConcatNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1826. {
  1827. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1828. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1829. {
  1830. if (!curr->head->IsCharTrieArm(compiler, accNumAlts))
  1831. return false;
  1832. }
  1833. return true;
  1834. }
  1835. bool ConcatNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1836. {
  1837. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1838. if (cont != 0)
  1839. // We don't want to manage a stack of continuations
  1840. return false;
  1841. // NOTE: This is the only place we use an internal node of a concat sequence as a sub-concat sequence
  1842. return head->BuildCharTrie(compiler, trie, tail, isAcceptFirst);
  1843. }
  1844. #if ENABLE_REGEX_CONFIG_OPTIONS
  1845. void ConcatNode::Print(DebugWriter* w, const Char* litbuf) const
  1846. {
  1847. w->PrintEOL(_u("Concat()"));
  1848. PrintAnnotations(w);
  1849. w->PrintEOL(_u("{"));
  1850. w->Indent();
  1851. for (const ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1852. curr->head->Print(w, litbuf);
  1853. w->Unindent();
  1854. w->PrintEOL(_u("}"));
  1855. }
  1856. #endif
  1857. // ----------------------------------------------------------------------
  1858. // AltNode
  1859. // ----------------------------------------------------------------------
  1860. CharCount AltNode::LiteralLength() const
  1861. {
  1862. return 0;
  1863. }
  1864. bool AltNode::IsCharOrPositiveSet() const
  1865. {
  1866. return false;
  1867. }
  1868. CharCount AltNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1869. {
  1870. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1871. Assert(tail != 0);
  1872. CharCount n = 0;
  1873. #if DBG
  1874. AltNode* prev = 0;
  1875. #endif
  1876. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1877. {
  1878. Assert(curr->head->tag != Alt);
  1879. Assert(prev == 0 || !(prev->head->IsCharOrPositiveSet() && curr->head->IsCharOrPositiveSet()));
  1880. n = UInt32Math::Add(n, curr->head->TransferPass0(compiler, litbuf));
  1881. #if DBG
  1882. prev = curr;
  1883. #endif
  1884. }
  1885. return n;
  1886. }
  1887. void AltNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1888. {
  1889. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1890. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  1891. curr->head->TransferPass1(compiler, litbuf);
  1892. }
  1893. bool AltNode::IsRefiningAssertion(Compiler& compiler)
  1894. {
  1895. return false;
  1896. }
  1897. void AltNode::AnnotatePass0(Compiler& compiler)
  1898. {
  1899. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1900. isWord = true;
  1901. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1902. {
  1903. curr->head->AnnotatePass0(compiler);
  1904. if (!curr->head->isWord)
  1905. isWord = false;
  1906. }
  1907. }
  1908. void AltNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1909. {
  1910. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1911. features = HasAlt;
  1912. isNotInLoop = parentNotInLoop;
  1913. isAtLeastOnce = parentAtLeastOnce;
  1914. isNotSpeculative = parentNotSpeculative;
  1915. isNotNegated = parentNotNegated;
  1916. // Overall alternative:
  1917. // - is irrefutable if at least one item is irrefutable
  1918. // - will not progress(regress) if each item will not progress(regress) and has strictly decreasing(increasing) consumes
  1919. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1920. isThisIrrefutable = false;
  1921. isThisWillNotProgress = true;
  1922. isThisWillNotRegress = true;
  1923. isFirstExact = true;
  1924. CountDomain prevConsumes;
  1925. int item = 0;
  1926. for (AltNode *curr = this; curr != 0; curr = curr->tail, item++)
  1927. {
  1928. curr->head->AnnotatePass1(compiler, parentNotInLoop, false, parentNotSpeculative, isNotNegated);
  1929. features |= curr->head->features;
  1930. if (!curr->head->isThisWillNotProgress)
  1931. isThisWillNotProgress = false;
  1932. if (!curr->head->isThisWillNotRegress)
  1933. isThisWillNotRegress = false;
  1934. if (item == 0)
  1935. prevConsumes = thisConsumes = curr->head->thisConsumes;
  1936. else
  1937. {
  1938. thisConsumes.Lub(curr->head->thisConsumes);
  1939. if (!curr->head->thisConsumes.IsLessThan(prevConsumes))
  1940. isThisWillNotProgress = false;
  1941. if (!curr->head->thisConsumes.IsGreaterThan(prevConsumes))
  1942. isThisWillNotRegress = false;
  1943. prevConsumes = curr->head->thisConsumes;
  1944. }
  1945. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1946. if (!curr->head->isFirstExact || curr->head->isThisIrrefutable)
  1947. // If any item is irrefutable then later items may never be taken, so first set cannot be exact
  1948. isFirstExact = false;
  1949. if (curr->head->isThisIrrefutable)
  1950. isThisIrrefutable = true;
  1951. }
  1952. }
  1953. void AltNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1954. {
  1955. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1956. prevConsumes = accumConsumes;
  1957. isPrevWillNotProgress = accumPrevWillNotProgress;
  1958. isPrevWillNotRegress = accumPrevWillNotRegress;
  1959. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1960. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1961. }
  1962. void AltNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1963. {
  1964. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1965. followConsumes = accumConsumes;
  1966. followSet = accumFollow;
  1967. isFollowIrrefutable = accumFollowIrrefutable;
  1968. isFollowEOL = accumFollowEOL;
  1969. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1970. {
  1971. curr->head->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  1972. // AltNode has hardfail BOI test if all child Nodes have hardfail BOI tests
  1973. hasInitialHardFailBOI = curr->head->hasInitialHardFailBOI && (hasInitialHardFailBOI || (curr == this));
  1974. }
  1975. }
  1976. void AltNode::AnnotatePass4(Compiler& compiler)
  1977. {
  1978. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1979. //
  1980. // Simplification rule
  1981. //
  1982. // If the follow is irrefutable then we can ignore all items after an irrefutable item, since
  1983. // we'll never be able to backtrack into them.
  1984. // E.g.: (a*|b*)c* === a*c*
  1985. //
  1986. bool simplified = false;
  1987. if (isFollowIrrefutable && isThisIrrefutable)
  1988. {
  1989. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1990. {
  1991. if (curr->head->isThisIrrefutable && curr->tail != 0)
  1992. {
  1993. curr->tail = 0;
  1994. simplified = true;
  1995. break;
  1996. }
  1997. }
  1998. }
  1999. if (simplified)
  2000. {
  2001. Assert(!isFirstExact);
  2002. // Recalculate firstSet. Since it can only get smaller, and alternative could not have had an exact
  2003. // first set, this recalculation does not make any decisions already made based on the current firstSet
  2004. // unsound.
  2005. // NOTE: Is it worth recalculating the WillNotProgress/WillNotRegress bools?
  2006. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  2007. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2008. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  2009. }
  2010. //
  2011. // Annotate items
  2012. //
  2013. isDeterministic = true;
  2014. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2015. {
  2016. curr->head->AnnotatePass4(compiler);
  2017. if (!curr->head->isDeterministic)
  2018. isDeterministic = false;
  2019. }
  2020. //
  2021. // Compilation scheme: Switch/Chain/Set, not isOptional
  2022. //
  2023. // If no item can match empty and all items' FIRST sets are pairwise disjoint then we can
  2024. // commit to an item using a 1 char lookahead. We can fall-through to the last
  2025. // item without guarding it since it will fail if the next character cannot match.
  2026. // E.g.: (abc|def)
  2027. //
  2028. {
  2029. // Pass 1: Items cannot match empty, accumulate counts
  2030. bool fires = true;
  2031. bool allCompact = true;
  2032. bool allSimpleOneChar = true;
  2033. int numItems = 0;
  2034. uint totalChars = 0;
  2035. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2036. {
  2037. if (curr->head->thisConsumes.CouldMatchEmpty())
  2038. {
  2039. fires = false;
  2040. break;
  2041. }
  2042. numItems++;
  2043. if (!curr->head->firstSet->IsCompact())
  2044. {
  2045. allCompact = false;
  2046. }
  2047. if (!curr->head->IsSimpleOneChar())
  2048. {
  2049. allSimpleOneChar = false;
  2050. }
  2051. totalChars += curr->head->firstSet->Count();
  2052. }
  2053. if (fires)
  2054. {
  2055. // To go from two to one items requires the first item
  2056. // to be irrefutable, in which case it could match empty and this rule won't fire.
  2057. Assert(numItems > 1);
  2058. // Step 2: Check FIRST sets are disjoint
  2059. if (totalChars == firstSet->Count())
  2060. {
  2061. // **COMMIT**
  2062. if (allSimpleOneChar)
  2063. {
  2064. // This will probably never fire since the parser has already converted alts-of-chars/sets
  2065. // to sets. We include it for symmetry with below.
  2066. scheme = Set;
  2067. }
  2068. else if (allCompact && totalChars <= Switch24Inst::MaxCases)
  2069. {
  2070. // Can use a switch instruction to jump to item
  2071. scheme = Switch;
  2072. switchSize = totalChars;
  2073. }
  2074. else
  2075. {
  2076. // Must use a chain of jump instructions to jump to item
  2077. scheme = Chain;
  2078. }
  2079. isOptional = false;
  2080. return;
  2081. }
  2082. }
  2083. }
  2084. //
  2085. // Compilation scheme: None/Switch/Chain/Set, isOptional
  2086. //
  2087. // Condition (1):
  2088. // If some items are empty-only, the rest (if any) cannot match empty, follow cannot match empty, and
  2089. // all items' FIRST sets are pairwise disjoint and disjoint from the FOLLOW set, then we can commit to
  2090. // either a non-empty item or to the empty item using a 1 char lookahead. In this case we just emit each
  2091. // non-empty item with a guard, and fall-through to follow if no guard fires.
  2092. // E.g.: (abc||def)h
  2093. //
  2094. // Condition (2):
  2095. // If some items are empty-only, the rest (if any) cannot match empty, follow is irrefutable, and all
  2096. // items' FIRST sets are pairwise disjoint, then we can commit to either a non-empty item or to the empty
  2097. // item using a 1 char lookahead, provided each non-empty item obeys the condition:
  2098. // ** the item can't fail if given an arbitrary input starting with a character in its FIRST set **
  2099. // Currently, we can prove that only for IsSimpleOneChar items, though more analysis could widen the class.
  2100. // Again, we emit each non-empty item with a guard, and fall-through to follow if no guard fires.
  2101. // E.g.: ([abc]|)a*
  2102. //
  2103. // Condition (3):
  2104. // If all items are empty-only, we can commit to a single empty-only item
  2105. {
  2106. // Pass 1
  2107. bool fires = false;
  2108. bool allSimpleOneChar = true;
  2109. bool allCompact = true;
  2110. int numNonEmpty = 0;
  2111. uint totalChars = 0;
  2112. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2113. {
  2114. if (curr->head->IsEmptyOnly())
  2115. {
  2116. fires = true;
  2117. }
  2118. else if (curr->head->thisConsumes.CouldMatchEmpty())
  2119. {
  2120. fires = false;
  2121. break;
  2122. }
  2123. else
  2124. {
  2125. numNonEmpty++;
  2126. if (!curr->head->IsSimpleOneChar())
  2127. {
  2128. allSimpleOneChar = false;
  2129. }
  2130. if (!curr->head->firstSet->IsCompact())
  2131. {
  2132. allCompact = false;
  2133. }
  2134. totalChars += curr->head->firstSet->Count();
  2135. }
  2136. }
  2137. if (fires)
  2138. {
  2139. // The firing condition is not strong enough yet.
  2140. fires = false;
  2141. // Check conditions (2) and (3) first because they're faster, then check condition (1).
  2142. if (numNonEmpty == 0 ||
  2143. (isFollowIrrefutable && allSimpleOneChar && totalChars == firstSet->Count()))
  2144. {
  2145. fires = true;
  2146. }
  2147. else if (!followConsumes.CouldMatchEmpty())
  2148. {
  2149. // Check whether all FIRST sets are pairwise disjoint
  2150. // and disjoint from the FOLLOW set.
  2151. CharSet<Char> unionSet;
  2152. unionSet.UnionInPlace(compiler.ctAllocator, *firstSet);
  2153. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  2154. if (totalChars + followSet->Count() == unionSet.Count())
  2155. {
  2156. fires = true;
  2157. }
  2158. }
  2159. if (fires)
  2160. {
  2161. // **COMMIT**
  2162. if (numNonEmpty == 0)
  2163. {
  2164. scheme = None;
  2165. }
  2166. else if (allSimpleOneChar)
  2167. {
  2168. scheme = Set;
  2169. }
  2170. else if (numNonEmpty > 1 && allCompact && totalChars <= Switch24Inst::MaxCases)
  2171. {
  2172. switchSize = totalChars;
  2173. scheme = Switch;
  2174. }
  2175. else
  2176. {
  2177. scheme = Chain;
  2178. }
  2179. isOptional = true;
  2180. return;
  2181. }
  2182. }
  2183. }
  2184. //
  2185. // Compilation scheme: Trie
  2186. //
  2187. // If alt is equivalent to the form:
  2188. // (literal1|...|literaln)
  2189. // (we expand items with embedded character classes such as a[bc]d to (abd|acd)) and either:
  2190. // - follow is irrefutable and no later literal is a proper prefix of an earlier literal
  2191. // (and we may ignore later literals which have an earlier literal as proper prefix)
  2192. // E.g.: (ab|ac|abd)a* === (ab|ac)a*
  2193. // or:
  2194. // - follow is not irrefutable and no literal is a proper prefix of any other literal
  2195. // and the branching factor of the resulting trie is smallish
  2196. // E.g.: (abc|abd|abe)f
  2197. // then we can use a character trie to match the appropriate item.
  2198. //
  2199. {
  2200. // Pass 1: Items must be structurally appropriate and not result in too many alternatives after expansion
  2201. bool fires = true;
  2202. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2203. {
  2204. uint numAlts = 1;
  2205. if (!curr->head->IsCharTrieArm(compiler, numAlts))
  2206. {
  2207. fires = false;
  2208. break;
  2209. }
  2210. if (numAlts > maxTrieArmExpansion)
  2211. {
  2212. fires = false;
  2213. break;
  2214. }
  2215. }
  2216. if (fires)
  2217. {
  2218. // Pass 2: Attempt to construct the trie, checking for prefixes.
  2219. CharTrie trie;
  2220. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2221. {
  2222. if (!curr->head->BuildCharTrie(compiler, &trie, 0, isFollowIrrefutable))
  2223. {
  2224. fires = false;
  2225. break;
  2226. }
  2227. }
  2228. if (fires)
  2229. {
  2230. // **COMMIT**
  2231. // If follow is irrefutable and first item is empty, the trie would be of depth zero.
  2232. // However, in this case, the first simplification rule would have replaced the alt with a
  2233. // single empty item, and the 'None' compilation scheme would have been selected above.
  2234. //
  2235. // Similarly, if all alternations are empty and follow is refutable, the trie would be
  2236. // of depth zero, and the 'None' compilation scheme would have been selected above.
  2237. Assert(!trie.IsDepthZero());
  2238. if (trie.IsDepthOne())
  2239. {
  2240. // This case will fire if follow is irrefutable and all non length one items have an
  2241. // earlier one-character item as prefix. In this case we don't need the trie: the
  2242. // firstSet has all the information.
  2243. isOptional = false;
  2244. scheme = Set;
  2245. }
  2246. else
  2247. {
  2248. // Root of trie will live in compile-time allocator, but body will be in run-time allocator
  2249. runtimeTrie = Anew(compiler.ctAllocator, RuntimeCharTrie);
  2250. runtimeTrie->CloneFrom(compiler.scriptContext, compiler.rtAllocator, trie);
  2251. scheme = Trie;
  2252. }
  2253. return;
  2254. }
  2255. }
  2256. }
  2257. //
  2258. // Compilation scheme: Try
  2259. //
  2260. scheme = Try;
  2261. isDeterministic = false; // NON-DETERMINISTIC
  2262. }
  2263. bool AltNode::SupportsPrefixSkipping(Compiler& compiler) const
  2264. {
  2265. return false;
  2266. }
  2267. Node* AltNode::HeadSyncronizingNode(Compiler& compiler)
  2268. {
  2269. return 0;
  2270. }
  2271. CharCount AltNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2272. {
  2273. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2274. // Here, we ignore nodes with length 1, which are Char nodes. The way the Alt node synchronization
  2275. // is currently implemented, it expects all nodes to be Literal nodes. It requires quite a bit of
  2276. // refactoring to have Alt nodes support Char nodes for synchronization, so Char nodes are ignored
  2277. // for now.
  2278. int localNumLiterals = numLiterals;
  2279. CharCount minLen = head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2280. if (minLen <= 1)
  2281. return 0;
  2282. for (AltNode* curr = tail; curr != 0; curr = curr->tail)
  2283. {
  2284. CharCount thisLen = curr->head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2285. if (thisLen <= 1)
  2286. return 0;
  2287. minLen = min(minLen, thisLen);
  2288. }
  2289. numLiterals = localNumLiterals;
  2290. if (minLen <= 1)
  2291. {
  2292. return 0;
  2293. }
  2294. return minLen;
  2295. }
  2296. void AltNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2297. {
  2298. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2299. for (const AltNode* curr = this; curr != 0; curr = curr->tail)
  2300. curr->head->CollectSyncronizingLiterals(compiler, scanners);
  2301. }
  2302. void AltNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2303. {
  2304. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2305. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  2306. bestNode = this;
  2307. }
  2308. void AltNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2309. {
  2310. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  2311. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  2312. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2313. }
  2314. void AltNode::Emit(Compiler& compiler, CharCount& skipped)
  2315. {
  2316. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2317. Assert(skipped == 0);
  2318. switch (scheme)
  2319. {
  2320. case Try:
  2321. {
  2322. //
  2323. // Compilation scheme:
  2324. //
  2325. // Try((If|Match)(Char|Set))? L2
  2326. // <item 1>
  2327. // Jump Lexit
  2328. // L2: Try((If|Match)(Char|Set))? L3
  2329. // <item 2>
  2330. // Jump Lexit
  2331. // L3: <item 3>
  2332. // Lexit:
  2333. //
  2334. Assert(!isOptional);
  2335. int numItems = 0;
  2336. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2337. numItems++;
  2338. Assert(numItems >= 1);
  2339. // Each item other than last needs to jump to exit on success
  2340. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2341. Label lastTryFixup = 0;
  2342. int item = 0;
  2343. for (AltNode* curr = this; curr != 0; curr = curr->tail, item++)
  2344. {
  2345. if (item > 0)
  2346. // Fixup previous Try
  2347. compiler.DoFixup(lastTryFixup, compiler.CurrentLabel());
  2348. CharCount itemSkipped = 0;
  2349. if (item < numItems-1)
  2350. {
  2351. // HEURISTIC: if the first set of the alternative is exact or small, and the
  2352. // alternative does not match empty, then it's probably worth using
  2353. // a Try(If|Match)(Char|Set)
  2354. if (curr->head->firstSet != 0 &&
  2355. !curr->head->thisConsumes.CouldMatchEmpty() &&
  2356. (curr->head->isFirstExact || curr->head->firstSet->Count() <= maxCharsForConditionalTry))
  2357. {
  2358. if (curr->head->SupportsPrefixSkipping(compiler))
  2359. {
  2360. if (curr->head->firstSet->IsSingleton())
  2361. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, curr->head->firstSet->Singleton())->failLabel);
  2362. else
  2363. {
  2364. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  2365. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2366. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2367. }
  2368. itemSkipped = 1;
  2369. }
  2370. else
  2371. {
  2372. if (curr->head->firstSet->IsSingleton())
  2373. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, curr->head->firstSet->Singleton())->failLabel);
  2374. else
  2375. {
  2376. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  2377. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2378. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2379. }
  2380. }
  2381. }
  2382. else
  2383. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  2384. }
  2385. curr->head->Emit(compiler, itemSkipped);
  2386. if (item < numItems-1)
  2387. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2388. }
  2389. // Fixup jumps
  2390. for (item = 0; item < numItems-1; item++)
  2391. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2392. break;
  2393. }
  2394. case None:
  2395. {
  2396. Assert(isOptional);
  2397. // Nothing to emit
  2398. break;
  2399. }
  2400. case Trie:
  2401. {
  2402. //
  2403. // Compilation scheme:
  2404. //
  2405. // MatchTrie <trie>
  2406. //
  2407. EMIT(compiler, MatchTrieInst)->trie = *runtimeTrie;
  2408. break;
  2409. }
  2410. case Switch:
  2411. {
  2412. //
  2413. // Compilation scheme:
  2414. //
  2415. // Switch(AndConsume)?(2|4|8|16|24)(<dispatch to each arm>)
  2416. // Fail (if non-optional)
  2417. // Jump Lexit (if optional)
  2418. // L1: <item1>
  2419. // Jump Lexit
  2420. // L2: <item2>
  2421. // Jump Lexit
  2422. // L3: <item3>
  2423. // Lexit:
  2424. //
  2425. Assert(switchSize <= Switch24Inst::MaxCases);
  2426. int numItems = 0;
  2427. bool allCanSkip = true;
  2428. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2429. {
  2430. if (curr->head->thisConsumes.CouldMatchEmpty())
  2431. {
  2432. Assert(isOptional);
  2433. }
  2434. else
  2435. {
  2436. numItems++;
  2437. if (!curr->head->SupportsPrefixSkipping(compiler))
  2438. {
  2439. allCanSkip = false;
  2440. }
  2441. }
  2442. }
  2443. Assert(numItems > 1);
  2444. // Each item other than last needs to jump to exit on success
  2445. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2446. // We must remember where each item begins to fixup switch
  2447. Label* caseLabels = AnewArray(compiler.ctAllocator, Label, numItems);
  2448. // We must fixup the switch arms
  2449. Label switchLabel = compiler.CurrentLabel();
  2450. Assert(switchSize <= Switch24Inst::MaxCases);
  2451. if (allCanSkip)
  2452. {
  2453. if (switchSize <= Switch2Inst::MaxCases)
  2454. {
  2455. EMIT(compiler, SwitchAndConsume2Inst);
  2456. }
  2457. else if (switchSize <= Switch4Inst::MaxCases)
  2458. {
  2459. EMIT(compiler, SwitchAndConsume4Inst);
  2460. }
  2461. else if (switchSize <= Switch8Inst::MaxCases)
  2462. {
  2463. EMIT(compiler, SwitchAndConsume8Inst);
  2464. }
  2465. else if (switchSize <= Switch16Inst::MaxCases)
  2466. {
  2467. EMIT(compiler, SwitchAndConsume16Inst);
  2468. }
  2469. else if (switchSize <= Switch24Inst::MaxCases)
  2470. {
  2471. EMIT(compiler, SwitchAndConsume24Inst);
  2472. }
  2473. else
  2474. {
  2475. AssertOrFailFastMsg(false, "It should not be possible to reach here. This implies that we entered the Switch layout with greater than the max allowable cases.");
  2476. }
  2477. }
  2478. else
  2479. {
  2480. if (switchSize <= Switch2Inst::MaxCases)
  2481. {
  2482. EMIT(compiler, Switch2Inst);
  2483. }
  2484. else if (switchSize <= Switch4Inst::MaxCases)
  2485. {
  2486. EMIT(compiler, Switch4Inst);
  2487. }
  2488. else if (switchSize <= Switch8Inst::MaxCases)
  2489. {
  2490. EMIT(compiler, Switch8Inst);
  2491. }
  2492. else if (switchSize <= Switch16Inst::MaxCases)
  2493. {
  2494. EMIT(compiler, Switch16Inst);
  2495. }
  2496. else if (switchSize <= Switch24Inst::MaxCases)
  2497. {
  2498. EMIT(compiler, Switch24Inst);
  2499. }
  2500. else
  2501. {
  2502. AssertOrFailFastMsg(false, "It should not be possible to reach here. This implies that we entered the Switch layout with greater than the max allowable cases.");
  2503. }
  2504. }
  2505. Label defaultJumpFixup = 0;
  2506. if (isOptional)
  2507. {
  2508. // Must fixup default jump to exit
  2509. defaultJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2510. }
  2511. else
  2512. {
  2513. compiler.Emit<FailInst>();
  2514. }
  2515. // Emit each item
  2516. int item = 0;
  2517. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2518. {
  2519. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2520. {
  2521. if (allCanSkip)
  2522. {
  2523. skipped = 1;
  2524. }
  2525. caseLabels[item] = compiler.CurrentLabel();
  2526. curr->head->Emit(compiler, skipped);
  2527. if (item < numItems - 1)
  2528. {
  2529. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2530. }
  2531. item++;
  2532. }
  2533. }
  2534. // Fixup exit labels
  2535. if (isOptional)
  2536. {
  2537. compiler.DoFixup(defaultJumpFixup, compiler.CurrentLabel());
  2538. }
  2539. for (item = 0; item < numItems - 1; item++)
  2540. {
  2541. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2542. }
  2543. // Fixup the switch entries
  2544. item = 0;
  2545. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2546. {
  2547. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2548. {
  2549. Char entries[CharSet<Char>::MaxCompact];
  2550. int count = curr->head->firstSet->GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  2551. Assert(count > 0);
  2552. for (int i = 0; i < count; i++)
  2553. {
  2554. if (allCanSkip)
  2555. {
  2556. if (switchSize <= Switch2Inst::MaxCases)
  2557. {
  2558. compiler.L2I(SwitchAndConsume2, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2559. }
  2560. else if (switchSize <= Switch4Inst::MaxCases)
  2561. {
  2562. compiler.L2I(SwitchAndConsume4, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2563. }
  2564. else if (switchSize <= Switch8Inst::MaxCases)
  2565. {
  2566. compiler.L2I(SwitchAndConsume8, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2567. }
  2568. else if (switchSize <= Switch16Inst::MaxCases)
  2569. {
  2570. compiler.L2I(SwitchAndConsume16, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2571. }
  2572. else if (switchSize <= Switch24Inst::MaxCases)
  2573. {
  2574. compiler.L2I(SwitchAndConsume24, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2575. }
  2576. }
  2577. else
  2578. {
  2579. if (switchSize <= Switch2Inst::MaxCases)
  2580. {
  2581. compiler.L2I(Switch2, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2582. }
  2583. else if (switchSize <= Switch4Inst::MaxCases)
  2584. {
  2585. compiler.L2I(Switch4, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2586. }
  2587. else if (switchSize <= Switch8Inst::MaxCases)
  2588. {
  2589. compiler.L2I(Switch8, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2590. }
  2591. else if (switchSize <= Switch16Inst::MaxCases)
  2592. {
  2593. compiler.L2I(Switch16, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2594. }
  2595. else if (switchSize <= Switch24Inst::MaxCases)
  2596. {
  2597. compiler.L2I(Switch24, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2598. }
  2599. }
  2600. }
  2601. item++;
  2602. }
  2603. }
  2604. break;
  2605. }
  2606. case Chain:
  2607. {
  2608. //
  2609. // Compilation scheme:
  2610. //
  2611. // JumpIfNot(Char|Set) L2
  2612. // <item1>
  2613. // Jump Lexit
  2614. // L2: JumpIfNot(Char|Set) L3
  2615. // <item2>
  2616. // Jump Lexit
  2617. // L3: <item3> (if non-optional)
  2618. // L3: JumpIfNot(Char|Set) Lexit (if optional)
  2619. // <item3> (if optional)
  2620. // Lexit:
  2621. //
  2622. int numItems = 0;
  2623. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2624. {
  2625. if (curr->head->thisConsumes.CouldMatchEmpty())
  2626. {
  2627. Assert(isOptional);
  2628. }
  2629. else
  2630. numItems++;
  2631. }
  2632. Assert(numItems > 0);
  2633. // Each item other than last needs to jump to exit on success
  2634. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2635. Label lastJumpFixup = 0;
  2636. int item = 0;
  2637. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2638. {
  2639. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2640. {
  2641. if (item > 0)
  2642. // Fixup previous Jump
  2643. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2644. CharCount itemSkipped = 0;
  2645. if (item < numItems-1 || isOptional)
  2646. {
  2647. if (curr->head->firstSet->IsSingleton())
  2648. {
  2649. if (curr->head->SupportsPrefixSkipping(compiler))
  2650. {
  2651. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, curr->head->firstSet->Singleton())->targetLabel);
  2652. itemSkipped = 1;
  2653. }
  2654. else
  2655. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, curr->head->firstSet->Singleton())->targetLabel);
  2656. }
  2657. else
  2658. {
  2659. if (curr->head->SupportsPrefixSkipping(compiler))
  2660. {
  2661. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  2662. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2663. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2664. itemSkipped = 1;
  2665. }
  2666. else
  2667. {
  2668. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  2669. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2670. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2671. }
  2672. }
  2673. }
  2674. curr->head->Emit(compiler, itemSkipped);
  2675. if (item < numItems-1)
  2676. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2677. item++;
  2678. }
  2679. }
  2680. // Fixup jumps to exit
  2681. for (item = 0; item < numItems-1; item++)
  2682. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2683. if (isOptional)
  2684. // Fixup last Jump to exit
  2685. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2686. break;
  2687. }
  2688. case Set:
  2689. {
  2690. //
  2691. // Compilation scheme:
  2692. //
  2693. // Match(Char|Set) (non optional)
  2694. // OptMatch(Char|Set) (optional)
  2695. //
  2696. if (isOptional)
  2697. {
  2698. if (firstSet->IsSingleton())
  2699. EMIT(compiler, OptMatchCharInst, firstSet->Singleton());
  2700. else
  2701. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2702. }
  2703. else
  2704. {
  2705. if (firstSet->IsSingleton())
  2706. EMIT(compiler, MatchCharInst, firstSet->Singleton());
  2707. else
  2708. EMIT(compiler, MatchSetInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2709. }
  2710. break;
  2711. }
  2712. }
  2713. }
  2714. CharCount AltNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2715. {
  2716. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2717. Assert(!isHeadSyncronizingNode);
  2718. //
  2719. // Compilation scheme:
  2720. //
  2721. // SyncToLiteralsAndBackup
  2722. //
  2723. SyncToLiteralsAndBackupInst* i =
  2724. EMIT(
  2725. compiler,
  2726. SyncToLiteralsAndBackupInst,
  2727. compiler.GetScriptContext()->GetRecycler(),
  2728. compiler.GetProgram(),
  2729. prevConsumes);
  2730. CollectSyncronizingLiterals(compiler, *i);
  2731. return 0;
  2732. }
  2733. bool AltNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2734. {
  2735. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2736. if (tail == 0 || tail->tail != 0)
  2737. // Must be exactly two alts
  2738. return false;
  2739. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2740. {
  2741. if (!oi->BeginConcat())
  2742. return false;
  2743. if (!curr->head->IsOctoquad(compiler, oi))
  2744. return false;
  2745. }
  2746. return true;
  2747. }
  2748. bool AltNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2749. {
  2750. return false;
  2751. }
  2752. bool AltNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2753. {
  2754. Assert(false);
  2755. return false;
  2756. }
  2757. #if ENABLE_REGEX_CONFIG_OPTIONS
  2758. void AltNode::Print(DebugWriter* w, const Char* litbuf) const
  2759. {
  2760. w->PrintEOL(_u("Alt()"));
  2761. PrintAnnotations(w);
  2762. w->PrintEOL(_u("{"));
  2763. w->Indent();
  2764. for (const AltNode *curr = this; curr != 0; curr = curr->tail)
  2765. curr->head->Print(w, litbuf);
  2766. w->Unindent();
  2767. w->PrintEOL(_u("}"));
  2768. }
  2769. #endif
  2770. // ----------------------------------------------------------------------
  2771. // DefineGroupNode
  2772. // ----------------------------------------------------------------------
  2773. CharCount DefineGroupNode::LiteralLength() const
  2774. {
  2775. return 0;
  2776. }
  2777. bool DefineGroupNode::IsCharOrPositiveSet() const
  2778. {
  2779. return false;
  2780. }
  2781. CharCount DefineGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2782. {
  2783. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2784. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  2785. return body->TransferPass0(compiler, litbuf);
  2786. }
  2787. void DefineGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2788. {
  2789. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2790. body->TransferPass1(compiler, litbuf);
  2791. }
  2792. bool DefineGroupNode::IsRefiningAssertion(Compiler& compiler)
  2793. {
  2794. return false;
  2795. }
  2796. void DefineGroupNode::AnnotatePass0(Compiler& compiler)
  2797. {
  2798. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2799. body->AnnotatePass0(compiler);
  2800. isWord = body->isWord;
  2801. }
  2802. void DefineGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2803. {
  2804. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2805. features = HasDefineGroup;
  2806. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, parentNotNegated);
  2807. features |= body->features;
  2808. thisConsumes = body->thisConsumes;
  2809. firstSet = body->firstSet;
  2810. isFirstExact = body->isFirstExact;
  2811. isThisIrrefutable = body->isThisIrrefutable;
  2812. isThisWillNotProgress = body->isThisWillNotProgress;
  2813. isThisWillNotRegress = body->isThisWillNotRegress;
  2814. isNotInLoop = parentNotInLoop;
  2815. isAtLeastOnce = parentAtLeastOnce;
  2816. isNotSpeculative = parentNotSpeculative;
  2817. isNotNegated = parentNotNegated;
  2818. }
  2819. void DefineGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  2820. {
  2821. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2822. prevConsumes = accumConsumes;
  2823. isPrevWillNotProgress = accumPrevWillNotProgress;
  2824. isPrevWillNotRegress = accumPrevWillNotRegress;
  2825. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  2826. }
  2827. void DefineGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  2828. {
  2829. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2830. followConsumes = accumConsumes;
  2831. followSet = accumFollow;
  2832. isFollowIrrefutable = accumFollowIrrefutable;
  2833. isFollowEOL = accumFollowEOL;
  2834. body->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  2835. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  2836. }
  2837. void DefineGroupNode::AnnotatePass4(Compiler& compiler)
  2838. {
  2839. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2840. body->AnnotatePass4(compiler);
  2841. isDeterministic = body->isDeterministic;
  2842. // If the follow is irrefutable and we're not in an assertion, then we are not going to backtrack beyond this point, so
  2843. // we don't need to save the group before updating it
  2844. noNeedToSave = isFollowIrrefutable && isNotSpeculative;
  2845. // Compilation scheme: Chomp
  2846. //
  2847. // Body consists of a loop node with a Chomp compilation scheme.
  2848. if(body->tag == NodeTag::Loop)
  2849. {
  2850. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2851. if(loop->scheme == LoopNode::CompilationScheme::Chomp && loop->repeats.lower <= 1 && loop->repeats.IsUnbounded())
  2852. {
  2853. // **COMMIT**
  2854. scheme = Chomp;
  2855. return;
  2856. }
  2857. }
  2858. // Compilation scheme: Fixed
  2859. //
  2860. // Body has fixed width, so don't need a Begin instruction to keep track of the input start offset of the group.
  2861. if (body->thisConsumes.IsFixed())
  2862. {
  2863. // **COMMIT**
  2864. scheme = Fixed;
  2865. return;
  2866. }
  2867. // Compilation scheme: BeginEnd
  2868. //
  2869. // If both the body and the follow are irrefutable, we're not in any loops, and we're not in an assertion,
  2870. // then we don't need to save the group before updating it.
  2871. // **COMMIT**
  2872. scheme = BeginEnd;
  2873. }
  2874. bool DefineGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  2875. {
  2876. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2877. if (scheme != Fixed)
  2878. // We can't skip over part of the match if the BeginDefineGroup must capture it's start
  2879. return false;
  2880. return body->SupportsPrefixSkipping(compiler);
  2881. }
  2882. Node* DefineGroupNode::HeadSyncronizingNode(Compiler& compiler)
  2883. {
  2884. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2885. if (scheme != Fixed)
  2886. // Can't skip BeginDefineGroup
  2887. return 0;
  2888. return body->HeadSyncronizingNode(compiler);
  2889. }
  2890. CharCount DefineGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2891. {
  2892. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2893. return body->MinSyncronizingLiteralLength(compiler, numLiterals);
  2894. }
  2895. void DefineGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2896. {
  2897. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2898. body->CollectSyncronizingLiterals(compiler, scanners);
  2899. }
  2900. void DefineGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2901. {
  2902. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2903. body->BestSyncronizingNode(compiler, bestNode);
  2904. }
  2905. void DefineGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2906. {
  2907. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  2908. if (groupId < minGroup)
  2909. minGroup = groupId;
  2910. if (groupId > maxGroup)
  2911. maxGroup = groupId;
  2912. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2913. }
  2914. void DefineGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  2915. {
  2916. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2917. switch (scheme)
  2918. {
  2919. case Chomp:
  2920. {
  2921. // Compilation scheme:
  2922. //
  2923. // Chomp(Char|Set)Group
  2924. Assert(body->tag == NodeTag::Loop);
  2925. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2926. const CharSet<Char> *const loopBodyFirstSet = loop->body->firstSet;
  2927. const CountDomain &repeats = loop->repeats;
  2928. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2929. if(loopBodyFirstSet->IsSingleton())
  2930. {
  2931. const Char c = loopBodyFirstSet->Singleton();
  2932. if(repeats.lower == 0)
  2933. EMIT(compiler, ChompCharGroupInst<ChompMode::Star>, c, groupId, noNeedToSave);
  2934. else
  2935. EMIT(compiler, ChompCharGroupInst<ChompMode::Plus>, c, groupId, noNeedToSave);
  2936. }
  2937. else
  2938. {
  2939. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2940. RuntimeCharSet<Char> *runtimeSet;
  2941. if(repeats.lower == 0)
  2942. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Star>, groupId, noNeedToSave)->set;
  2943. else
  2944. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Plus>, groupId, noNeedToSave)->set;
  2945. runtimeSet->CloneFrom(compiler.rtAllocator, *loopBodyFirstSet);
  2946. }
  2947. break;
  2948. }
  2949. case Fixed:
  2950. {
  2951. // Compilation scheme:
  2952. //
  2953. // <body>
  2954. // DefineGroup
  2955. Assert(body->thisConsumes.IsFixed());
  2956. body->Emit(compiler, skipped);
  2957. EMIT(compiler, DefineGroupFixedInst, groupId, body->thisConsumes.lower, noNeedToSave);
  2958. break;
  2959. }
  2960. case BeginEnd:
  2961. {
  2962. // Compilation scheme:
  2963. //
  2964. // BeginDefineGroup
  2965. // <body>
  2966. // EndDefineGroup
  2967. Assert(skipped == 0);
  2968. EMIT(compiler, BeginDefineGroupInst, groupId);
  2969. body->Emit(compiler, skipped);
  2970. EMIT(compiler, EndDefineGroupInst, groupId, noNeedToSave);
  2971. break;
  2972. }
  2973. }
  2974. }
  2975. CharCount DefineGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2976. {
  2977. Assert(false);
  2978. return 0;
  2979. }
  2980. bool DefineGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2981. {
  2982. return false;
  2983. }
  2984. bool DefineGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2985. {
  2986. return false;
  2987. }
  2988. bool DefineGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2989. {
  2990. Assert(false);
  2991. return false;
  2992. }
  2993. #if ENABLE_REGEX_CONFIG_OPTIONS
  2994. void DefineGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  2995. {
  2996. w->PrintEOL(_u("DefineGroup(%d)"), groupId);
  2997. PrintAnnotations(w);
  2998. w->PrintEOL(_u("{"));
  2999. w->Indent();
  3000. body->Print(w, litbuf);
  3001. w->Unindent();
  3002. w->PrintEOL(_u("}"));
  3003. }
  3004. #endif
  3005. // ----------------------------------------------------------------------
  3006. // MatchGroupNode
  3007. // ----------------------------------------------------------------------
  3008. CharCount MatchGroupNode::LiteralLength() const
  3009. {
  3010. return 0;
  3011. }
  3012. bool MatchGroupNode::IsCharOrPositiveSet() const
  3013. {
  3014. return false;
  3015. }
  3016. CharCount MatchGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3017. {
  3018. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  3019. return 0;
  3020. }
  3021. void MatchGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3022. {
  3023. }
  3024. bool MatchGroupNode::IsRefiningAssertion(Compiler& compiler)
  3025. {
  3026. return false;
  3027. }
  3028. void MatchGroupNode::AnnotatePass0(Compiler& compiler)
  3029. {
  3030. isWord = false;
  3031. }
  3032. void MatchGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3033. {
  3034. features = HasMatchGroup;
  3035. thisConsumes.lower = 0;
  3036. thisConsumes.upper = CharCountFlag;
  3037. firstSet = compiler.standardChars->GetFullSet();
  3038. isFirstExact = false;
  3039. isThisIrrefutable = false;
  3040. isThisWillNotProgress = true;
  3041. isThisWillNotRegress = true;
  3042. isNotInLoop = parentNotInLoop;
  3043. isAtLeastOnce = parentAtLeastOnce;
  3044. isNotSpeculative = parentNotSpeculative;
  3045. isNotNegated = parentNotNegated;
  3046. }
  3047. void MatchGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3048. {
  3049. prevConsumes = accumConsumes;
  3050. isPrevWillNotProgress = accumPrevWillNotProgress;
  3051. isPrevWillNotRegress = accumPrevWillNotRegress;
  3052. }
  3053. void MatchGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3054. {
  3055. followConsumes = accumConsumes;
  3056. followSet = accumFollow;
  3057. isFollowIrrefutable = accumFollowIrrefutable;
  3058. isFollowEOL = accumFollowEOL;
  3059. }
  3060. void MatchGroupNode::AnnotatePass4(Compiler& compiler)
  3061. {
  3062. isDeterministic = true;
  3063. }
  3064. bool MatchGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  3065. {
  3066. return false;
  3067. }
  3068. Node* MatchGroupNode::HeadSyncronizingNode(Compiler& compiler)
  3069. {
  3070. return 0;
  3071. }
  3072. CharCount MatchGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3073. {
  3074. return 0;
  3075. }
  3076. void MatchGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3077. {
  3078. Assert(false);
  3079. }
  3080. void MatchGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3081. {
  3082. }
  3083. void MatchGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3084. {
  3085. }
  3086. void MatchGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  3087. {
  3088. Assert(skipped == 0);
  3089. //
  3090. // Compilation scheme:
  3091. //
  3092. // MatchGroup
  3093. //
  3094. EMIT(compiler, MatchGroupInst, groupId);
  3095. }
  3096. CharCount MatchGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3097. {
  3098. Assert(false);
  3099. return 0;
  3100. }
  3101. bool MatchGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3102. {
  3103. return false;
  3104. }
  3105. bool MatchGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3106. {
  3107. return false;
  3108. }
  3109. bool MatchGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3110. {
  3111. Assert(false);
  3112. return false;
  3113. }
  3114. #if ENABLE_REGEX_CONFIG_OPTIONS
  3115. void MatchGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  3116. {
  3117. w->PrintEOL(_u("MatchGroup(%d)"), groupId);
  3118. PrintAnnotations(w);
  3119. }
  3120. #endif
  3121. // ----------------------------------------------------------------------
  3122. // LoopNode
  3123. // ----------------------------------------------------------------------
  3124. CharCount LoopNode::LiteralLength() const
  3125. {
  3126. return 0;
  3127. }
  3128. bool LoopNode::IsCharOrPositiveSet() const
  3129. {
  3130. return false;
  3131. }
  3132. CharCount LoopNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3133. {
  3134. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3135. Assert(repeats.upper == CharCountFlag || repeats.upper > 0);
  3136. Assert(repeats.upper == CharCountFlag || repeats.upper >= repeats.lower);
  3137. Assert(!(repeats.lower == 1 && repeats.upper == 1));
  3138. return body->TransferPass0(compiler, litbuf);
  3139. }
  3140. void LoopNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3141. {
  3142. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3143. body->TransferPass1(compiler, litbuf);
  3144. }
  3145. bool LoopNode::IsRefiningAssertion(Compiler& compiler)
  3146. {
  3147. return false;
  3148. }
  3149. void LoopNode::AnnotatePass0(Compiler& compiler)
  3150. {
  3151. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3152. body->AnnotatePass0(compiler);
  3153. isWord = !repeats.CouldMatchEmpty() && body->isWord;
  3154. }
  3155. void LoopNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3156. {
  3157. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3158. features = HasLoop;
  3159. isNotInLoop = parentNotInLoop;
  3160. isAtLeastOnce = parentAtLeastOnce;
  3161. isNotSpeculative = parentNotSpeculative;
  3162. isNotNegated = parentNotNegated;
  3163. body->AnnotatePass1(compiler, false, parentAtLeastOnce && repeats.lower > 0, parentNotSpeculative, isNotNegated);
  3164. features |= body->features;
  3165. thisConsumes = body->thisConsumes;
  3166. thisConsumes.Mult(repeats);
  3167. firstSet = body->firstSet;
  3168. isFirstExact = repeats.lower > 0 && body->isFirstExact;
  3169. isThisIrrefutable = repeats.CouldMatchEmpty() || body->isThisIrrefutable;
  3170. // Caution: Even if a greedy loop has a 'isThisWillNotProgress' body, if the body has choicepoints then
  3171. // a backtrack could resume execution at an earlier loop iteration, which may then continue to repeat
  3172. // the loop beyond the input offset which triggered the backtrack. Ideally we'd use the body's isDeterministic
  3173. // flag to tell us when that can't happen, but it's not available till pass 4, so we must make do with
  3174. // a simple-minded structural approximation.
  3175. isThisWillNotProgress = (isGreedy || repeats.IsExact(1)) && body->isThisWillNotProgress && body->IsObviouslyDeterministic();
  3176. isThisWillNotRegress = (!isGreedy || repeats.IsExact(1)) && body->isThisWillNotRegress && body->IsObviouslyDeterministic();
  3177. }
  3178. void LoopNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3179. {
  3180. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3181. prevConsumes = accumConsumes;
  3182. isPrevWillNotProgress = accumPrevWillNotProgress;
  3183. isPrevWillNotRegress = accumPrevWillNotRegress;
  3184. // May have already gone through loop when starting body
  3185. CountDomain bodyConsumes = body->thisConsumes;
  3186. CharCountOrFlag prevMax = repeats.upper;
  3187. if (prevMax != CharCountFlag)
  3188. prevMax--;
  3189. CountDomain prevLoops(0, prevMax);
  3190. bodyConsumes.Mult(prevLoops);
  3191. accumConsumes.Add(bodyConsumes);
  3192. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress && isThisWillNotProgress, accumPrevWillNotRegress && isThisWillNotRegress);
  3193. }
  3194. void LoopNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3195. {
  3196. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3197. followConsumes = accumConsumes;
  3198. followSet = accumFollow;
  3199. isFollowIrrefutable = accumFollowIrrefutable;
  3200. isFollowEOL = accumFollowEOL;
  3201. // May go through loop again when leaving body
  3202. CountDomain bodyConsumes = body->thisConsumes;
  3203. CharCountOrFlag nextMax = repeats.upper;
  3204. if (nextMax != CharCountFlag)
  3205. nextMax--;
  3206. CountDomain nextLoops(0, nextMax);
  3207. bodyConsumes.Mult(nextLoops);
  3208. accumConsumes.Add(bodyConsumes);
  3209. CharSet<Char>* innerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  3210. innerFollow->UnionInPlace(compiler.ctAllocator, *accumFollow);
  3211. innerFollow->UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3212. if (followSet->IsSingleton())
  3213. {
  3214. Assert(followSet->IsCompact());
  3215. followFirst = followSet->GetCompactChar(0);
  3216. }
  3217. /*
  3218. All of the following must be true for the loop body's follow to be irrefutable:
  3219. The loop's follow is irrefutable.
  3220. The loop can complete the required minimum number of iterations of the body without backtracking into a completed
  3221. iteration of the body.
  3222. - If repeats.lower == 0, the required minimum number of iterations is met without executing the body
  3223. - If repeats.lower == 1
  3224. - If the first iteration of the body fails, there is no previous iteration of the body to backtrack into
  3225. - After completing the first iteration of the body, the loop cannot reject the first iteration for not
  3226. making progress because the iteration is required for the loop to succeed
  3227. - If repeats.lower >= 2
  3228. - If the second iteration of the body fails, it will backtrack into the first iteration of the body
  3229. - To prevent this, the body must be irrefutable
  3230. After completing the required minimum number of iterations of the body, the loop cannot reject a subsequent
  3231. completed iteration of the body for not making progress.
  3232. - If !isGreedy || repeats.IsFixed(), there will not be any more iterations of the body, as it will proceed to
  3233. the irrefutable follow
  3234. - If !body->thisConsumes.CouldMatchEmpty(), subsequent iterations of the body cannot complete without making
  3235. progress
  3236. */
  3237. const bool isBodyFollowIrrefutable =
  3238. accumFollowIrrefutable &&
  3239. (repeats.lower <= 1 || body->isThisIrrefutable) &&
  3240. (!isGreedy || !body->thisConsumes.CouldMatchEmpty() || repeats.IsFixed());
  3241. body->AnnotatePass3(compiler, accumConsumes, innerFollow, isBodyFollowIrrefutable, false);
  3242. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3243. }
  3244. void LoopNode::AnnotatePass4(Compiler& compiler)
  3245. {
  3246. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3247. body->AnnotatePass4(compiler);
  3248. isDeterministic = body->isDeterministic;
  3249. //
  3250. // Loops can be defined by unfolding:
  3251. // r* === (rr*|)
  3252. // r*? === (|rr*?)
  3253. // Thus many of the optimizations for alternatives carry over to loops.
  3254. //
  3255. //
  3256. // Compilation scheme: None
  3257. //
  3258. // If overall loop is empty-only then emit nothing.
  3259. // (Parser has already eliminated loops with upper == 0, so this can only happen if the body is empty-only)
  3260. //
  3261. // If loop is non-greedy with lower 0 and follow is irrefutable, then loop body will never be executed
  3262. // no emit nothing.
  3263. //
  3264. if (body->IsEmptyOnly() ||
  3265. (!isGreedy && repeats.lower == 0 && isFollowIrrefutable))
  3266. {
  3267. // **COMMIT**
  3268. scheme = None;
  3269. return;
  3270. }
  3271. //
  3272. // Compilation scheme: Chain/Try
  3273. //
  3274. // If loop is greedy, with lower 0 and upper 1, then we'd like to treat it as body|<empty> so as to avoid all the loop
  3275. // overhead. However, if the body could match empty, then a match against empty must be treated as an 'iteration' of the
  3276. // loop body which made no progress. So we treat as a general loop in that case. Otherwise, we may inline two of
  3277. // AltNode's compilation schemes:
  3278. // Examples:
  3279. // - /(a*)?/.exec("") must leave group 1 undefined rather than empty.
  3280. // - /(?:a||b)?/.exec("b") chooses the empty alt, then must backtrack due to no progress, and match 'b'.
  3281. // This is not the same as /a||b|/, as picking the first empty alt would result in success.
  3282. //
  3283. // (cf AltNode's None/Switch/Chain/Set, isOptional compilation scheme)
  3284. // If body cannot match empty, follow cannot match empty, and the body FIRST set is disjoint from the FOLLOW
  3285. // set, then we can commit to the body using a 1 char lookahead.
  3286. //
  3287. // If body cannot match empty, and follow is irrefutable, then we can commit to the body using a 1 char
  3288. // lookahead provided:
  3289. // ** the body can't fail if given an arbitrary input starting with a character in its FIRST set **
  3290. //
  3291. // (cf AltNode's Try compilation scheme)
  3292. // Otherwise, protect body by a Try instruction.
  3293. //
  3294. if (isGreedy && repeats.lower == 0 && repeats.upper == 1 && !body->thisConsumes.CouldMatchEmpty())
  3295. {
  3296. // **COMMIT**
  3297. // Note that the FIRST of the loop is already the union of the body FIRST and the loop FOLLOW
  3298. if (!body->thisConsumes.CouldMatchEmpty() &&
  3299. ((!followConsumes.CouldMatchEmpty() && firstSet->Count() == body->firstSet->Count() + followSet->Count()) ||
  3300. (isFollowIrrefutable && body->IsSimpleOneChar())))
  3301. {
  3302. if (body->IsSimpleOneChar())
  3303. scheme = Set;
  3304. else
  3305. scheme = Chain;
  3306. }
  3307. else
  3308. {
  3309. scheme = Try;
  3310. isDeterministic = false; // NON-DETERMINISTIC
  3311. }
  3312. return;
  3313. }
  3314. //
  3315. // Compilation scheme: Chomp/ChompGroupLastChar
  3316. //
  3317. // If the body is a simple-one-char, or a group of a simple-one-char, and either:
  3318. // - follow is non-empty and FIRST and FOLLOW are disjoint
  3319. // - loop is greedy and follow is irrefutable
  3320. // - follow is EOL
  3321. // then consume up to upper number of characters in FIRST and fail if number consumed is not >= lower.
  3322. //
  3323. if (body->IsSimpleOneChar() || (body->tag == DefineGroup && ((DefineGroupNode*)body)->body->IsSimpleOneChar()))
  3324. {
  3325. if (!followConsumes.CouldMatchEmpty())
  3326. {
  3327. CharSet<Char> unionSet;
  3328. CharCount totalChars = 0;
  3329. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3330. totalChars += body->firstSet->Count();
  3331. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3332. totalChars += followSet->Count();
  3333. if (totalChars == unionSet.Count())
  3334. {
  3335. // **COMMIT**
  3336. if (body->tag == DefineGroup)
  3337. {
  3338. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3339. scheme = ChompGroupLastChar;
  3340. }
  3341. else
  3342. scheme = Chomp;
  3343. return;
  3344. }
  3345. }
  3346. if ((isGreedy && isFollowIrrefutable) || isFollowEOL)
  3347. {
  3348. // **COMMIT**
  3349. if (body->tag == DefineGroup)
  3350. {
  3351. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3352. scheme = ChompGroupLastChar;
  3353. }
  3354. else
  3355. scheme = Chomp;
  3356. return;
  3357. }
  3358. }
  3359. //
  3360. // Compilation scheme: Guarded
  3361. //
  3362. // If body cannot match empty, follow cannot match empty, and FIRST of body and FOLLOW are
  3363. // disjoint then can use 1 char lookahead to decide whether to commit to another loop body.
  3364. // (If the loop body fails then we know the follow will fail even with one more/fewer iterations of the
  3365. // loop body, so we can let that failure propagate without needing to push choicepoints.)
  3366. //
  3367. if (!body->thisConsumes.CouldMatchEmpty() && !followConsumes.CouldMatchEmpty())
  3368. {
  3369. CharSet<Char> unionSet;
  3370. CharCount totalChars = 0;
  3371. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3372. totalChars += body->firstSet->Count();
  3373. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3374. totalChars += followSet->Count();
  3375. if (totalChars == unionSet.Count())
  3376. {
  3377. // **COMMIT**
  3378. scheme = Guarded;
  3379. return;
  3380. }
  3381. }
  3382. //
  3383. // Compilation scheme: Fixed/FixedSet/FixedGroupLastIteration
  3384. //
  3385. // If loop is greedy, body is deterministic, non-zero fixed width, and either does not define any groups
  3386. // or has one outermost group, then we can keep track of the backtracking information in constant space.
  3387. //
  3388. // If body does have an outer group, we can avoid saving the existing group contents if the follow
  3389. // is irrefutable, we're not in an outer loop, and we're not in an assertion.
  3390. //
  3391. if (isGreedy && body->isDeterministic && !body->thisConsumes.CouldMatchEmpty() && body->thisConsumes.IsFixed())
  3392. {
  3393. if (body->tag == DefineGroup)
  3394. {
  3395. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3396. if (!bodyGroup->body->ContainsDefineGroup())
  3397. {
  3398. // **COMMIT**
  3399. scheme = FixedGroupLastIteration;
  3400. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3401. isDeterministic = false; // NON-DETERMINISTIC;
  3402. return;
  3403. }
  3404. }
  3405. else if (body->IsSimpleOneChar())
  3406. {
  3407. // **COMMIT**
  3408. scheme = FixedSet;
  3409. isDeterministic = false; // NON-DETERMINISTIC
  3410. return;
  3411. }
  3412. else if (!body->ContainsDefineGroup())
  3413. {
  3414. // **COMMIT**
  3415. scheme = Fixed;
  3416. isDeterministic = false; // NON-DETERMINISTIC
  3417. return;
  3418. }
  3419. }
  3420. //
  3421. // Compilation scheme: GreedyNoBacktrack
  3422. //
  3423. // If loop is greedy with lower == 0 and upper == inf, the loop body is deterministic and does not define
  3424. // groups, and follow is irrefutable, then we will never have to try fewer iterations of the loop once
  3425. // entering the follow. Thus we only need one continuation record on the stack to protect against failure
  3426. // for each attempt at the loop body.
  3427. //
  3428. if (isGreedy && repeats.lower == 0 && repeats.upper == CharCountFlag && body->isDeterministic && !body->ContainsDefineGroup() && isFollowIrrefutable)
  3429. {
  3430. // **COMMIT**
  3431. scheme = GreedyNoBacktrack;
  3432. return;
  3433. }
  3434. //
  3435. // Compilation scheme: BeginEnd
  3436. //
  3437. scheme = BeginEnd;
  3438. isDeterministic = false; // NON-DETERMINISTIC
  3439. }
  3440. bool LoopNode::SupportsPrefixSkipping(Compiler& compiler) const
  3441. {
  3442. return false;
  3443. }
  3444. Node* LoopNode::HeadSyncronizingNode(Compiler& compiler)
  3445. {
  3446. return 0;
  3447. }
  3448. CharCount LoopNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3449. {
  3450. return 0;
  3451. }
  3452. void LoopNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3453. {
  3454. Assert(false);
  3455. }
  3456. void LoopNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3457. {
  3458. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3459. if (repeats.lower > 0)
  3460. body->BestSyncronizingNode(compiler, bestNode);
  3461. // else: can't be sure loop will be taken
  3462. }
  3463. void LoopNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3464. {
  3465. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  3466. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3467. }
  3468. void LoopNode::Emit(Compiler& compiler, CharCount& skipped)
  3469. {
  3470. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3471. Assert(skipped == 0);
  3472. switch (scheme)
  3473. {
  3474. case BeginEnd:
  3475. {
  3476. //
  3477. // Compilation scheme:
  3478. //
  3479. // Lloop: BeginLoop Lexit
  3480. // <loop body>
  3481. // RepeatLoop Lloop
  3482. // Lexit:
  3483. //
  3484. int minBodyGroupId = compiler.program->numGroups;
  3485. int maxBodyGroupId = -1;
  3486. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3487. Label beginLabel = compiler.CurrentLabel();
  3488. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId, isGreedy)->exitLabel);
  3489. body->Emit(compiler, skipped);
  3490. EMIT(compiler, RepeatLoopInst, beginLabel);
  3491. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3492. break;
  3493. }
  3494. case None:
  3495. {
  3496. // Nothing to emit
  3497. break;
  3498. }
  3499. case Chomp:
  3500. {
  3501. //
  3502. // Compilation scheme:
  3503. //
  3504. // Chomp(Char|Set)(Star|Plus|Bounded)
  3505. //
  3506. if(body->firstSet->IsSingleton())
  3507. {
  3508. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3509. {
  3510. if(repeats.lower == 0)
  3511. EMIT(compiler, ChompCharInst<ChompMode::Star>, body->firstSet->Singleton());
  3512. else
  3513. EMIT(compiler, ChompCharInst<ChompMode::Plus>, body->firstSet->Singleton());
  3514. }
  3515. else
  3516. EMIT(compiler, ChompCharBoundedInst, body->firstSet->Singleton(), repeats);
  3517. }
  3518. else
  3519. {
  3520. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3521. {
  3522. if(repeats.lower == 0)
  3523. EMIT(compiler, ChompSetInst<ChompMode::Star>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3524. else
  3525. EMIT(compiler, ChompSetInst<ChompMode::Plus>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3526. }
  3527. else
  3528. EMIT(compiler, ChompSetBoundedInst, repeats)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3529. }
  3530. break;
  3531. }
  3532. case ChompGroupLastChar:
  3533. {
  3534. //
  3535. // Compilation scheme:
  3536. //
  3537. // ChompSetGroup
  3538. //
  3539. Assert(body->tag == DefineGroup);
  3540. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3541. EMIT(compiler, ChompSetBoundedGroupLastCharInst, repeats, bodyGroup->groupId, noNeedToSave)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3542. break;
  3543. }
  3544. case Guarded:
  3545. {
  3546. //
  3547. // Compilation scheme:
  3548. //
  3549. // Lloop: BeginLoopIf(Char|Set) Lexit
  3550. // <loop body>
  3551. // RepeatLoopIf(Char|Set) Lloop
  3552. // Lexit:
  3553. //
  3554. int minBodyGroupId = compiler.program->numGroups;
  3555. int maxBodyGroupId = -1;
  3556. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3557. Label beginLabel = compiler.CurrentLabel();
  3558. Label exitFixup;
  3559. if (body->firstSet->IsSingleton())
  3560. exitFixup = compiler.GetFixup(&EMIT(compiler, BeginLoopIfCharInst, body->firstSet->Singleton(), compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId)->exitLabel);
  3561. else
  3562. {
  3563. BeginLoopIfSetInst* i = EMIT(compiler, BeginLoopIfSetInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId);
  3564. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3565. exitFixup = compiler.GetFixup(&i->exitLabel);
  3566. }
  3567. body->Emit(compiler, skipped);
  3568. if (body->firstSet->IsSingleton())
  3569. EMIT(compiler, RepeatLoopIfCharInst, beginLabel);
  3570. else
  3571. EMIT(compiler, RepeatLoopIfSetInst, beginLabel);
  3572. compiler.DoFixup(exitFixup, compiler.CurrentLabel());
  3573. break;
  3574. }
  3575. case Fixed:
  3576. {
  3577. //
  3578. // Compilation scheme:
  3579. //
  3580. // Lloop: BeginLoopFixed Lexit
  3581. // <loop body>
  3582. // RepeatLoopFixed Lloop
  3583. // Lexit:
  3584. //
  3585. Assert(!body->ContainsDefineGroup());
  3586. Assert(body->thisConsumes.IsFixed());
  3587. Assert(body->thisConsumes.lower > 0);
  3588. Assert(body->isDeterministic);
  3589. Label beginLabel = compiler.CurrentLabel();
  3590. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower)->exitLabel);
  3591. body->Emit(compiler, skipped);
  3592. EMIT(compiler, RepeatLoopFixedInst, beginLabel);
  3593. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3594. break;
  3595. }
  3596. case FixedSet:
  3597. {
  3598. //
  3599. // Compilation scheme:
  3600. //
  3601. // LoopSet
  3602. //
  3603. Assert(body->IsSimpleOneChar());
  3604. if (followFirst == MaxChar || PHASE_OFF1(Js::RegexOptBTPhase))
  3605. {
  3606. EMIT(compiler, LoopSetInst, compiler.NextLoopId(), repeats, !isNotInLoop)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3607. }
  3608. else
  3609. {
  3610. EMIT(compiler, LoopSetWithFollowFirstInst, compiler.NextLoopId(), repeats, !isNotInLoop, followFirst)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3611. }
  3612. break;
  3613. }
  3614. case FixedGroupLastIteration:
  3615. {
  3616. //
  3617. // Compilation scheme:
  3618. //
  3619. // Lloop: BeginLoopFixedGroupLastIteration Lexit
  3620. // <loop body>
  3621. // RepeatLoopFixedGroupLastIteration Lloop
  3622. // Lexit:
  3623. //
  3624. Assert(body->tag == DefineGroup);
  3625. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3626. Assert(body->thisConsumes.IsFixed());
  3627. Assert(body->thisConsumes.lower > 0);
  3628. Assert(body->isDeterministic);
  3629. Label beginLabel = compiler.CurrentLabel();
  3630. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedGroupLastIterationInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower, bodyGroup->groupId, noNeedToSave)->exitLabel);
  3631. bodyGroup->body->Emit(compiler, skipped);
  3632. EMIT(compiler, RepeatLoopFixedGroupLastIterationInst, beginLabel);
  3633. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3634. break;
  3635. }
  3636. case GreedyNoBacktrack:
  3637. {
  3638. //
  3639. // Compilation scheme:
  3640. //
  3641. // Lloop: BeginGreedyLoopNoBacktrack Lexit
  3642. // <loop body>
  3643. // RepeatGreedyLoopNoBacktrack Lloop
  3644. // Lexit:
  3645. //
  3646. Assert(!body->ContainsDefineGroup());
  3647. Assert(isGreedy);
  3648. Assert(repeats.lower == 0);
  3649. Assert(repeats.upper == CharCountFlag);
  3650. Assert(body->isDeterministic);
  3651. Label beginLabel = compiler.CurrentLabel();
  3652. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginGreedyLoopNoBacktrackInst, compiler.NextLoopId())->exitLabel);
  3653. body->Emit(compiler, skipped);
  3654. EMIT(compiler, RepeatGreedyLoopNoBacktrackInst, beginLabel);
  3655. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3656. break;
  3657. }
  3658. case Set:
  3659. {
  3660. //
  3661. // Compilation scheme:
  3662. //
  3663. // OptMatch(Char|Set)
  3664. //
  3665. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3666. if (body->firstSet->IsSingleton())
  3667. EMIT(compiler, OptMatchCharInst, body->firstSet->Singleton());
  3668. else
  3669. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3670. break;
  3671. }
  3672. case Chain:
  3673. {
  3674. //
  3675. // Compilation scheme:
  3676. //
  3677. // JumpIfNot(Char|Set) Lexit
  3678. // <body>
  3679. // Lexit:
  3680. //
  3681. //
  3682. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3683. Label jumpFixup = 0;
  3684. CharCount bodySkipped = 0;
  3685. if (body->firstSet->IsSingleton())
  3686. {
  3687. if (body->SupportsPrefixSkipping(compiler))
  3688. {
  3689. jumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, body->firstSet->Singleton())->targetLabel);
  3690. bodySkipped = 1;
  3691. }
  3692. else
  3693. jumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, body->firstSet->Singleton())->targetLabel);
  3694. }
  3695. else
  3696. {
  3697. if (body->SupportsPrefixSkipping(compiler))
  3698. {
  3699. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  3700. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3701. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3702. bodySkipped = 1;
  3703. }
  3704. else
  3705. {
  3706. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  3707. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3708. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3709. }
  3710. }
  3711. body->Emit(compiler, bodySkipped);
  3712. compiler.DoFixup(jumpFixup, compiler.CurrentLabel());
  3713. break;
  3714. }
  3715. case Try:
  3716. {
  3717. //
  3718. // Compilation scheme:
  3719. //
  3720. // Try((If|Match)(Char|Set))? Lexit
  3721. // <loop body>
  3722. // Lexit:
  3723. //
  3724. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3725. Label tryFixup = 0;
  3726. CharCount bodySkipped = 0;
  3727. // HEURISTIC: if the first set of the body is exact or small, and the
  3728. // body does not match empty, then it's probably worth using
  3729. // a Try(If|Match)(Char|Set)
  3730. if (!body->thisConsumes.CouldMatchEmpty() &&
  3731. (body->isFirstExact || body->firstSet->Count() <= maxCharsForConditionalTry))
  3732. {
  3733. if (body->SupportsPrefixSkipping(compiler))
  3734. {
  3735. if (body->firstSet->IsSingleton())
  3736. tryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, body->firstSet->Singleton())->failLabel);
  3737. else
  3738. {
  3739. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  3740. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3741. tryFixup = compiler.GetFixup(&i->failLabel);
  3742. }
  3743. bodySkipped = 1;
  3744. }
  3745. else
  3746. {
  3747. if(body->firstSet->IsSingleton())
  3748. tryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, body->firstSet->Singleton())->failLabel);
  3749. else
  3750. {
  3751. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  3752. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3753. tryFixup = compiler.GetFixup(&i->failLabel);
  3754. }
  3755. }
  3756. }
  3757. else
  3758. tryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  3759. body->Emit(compiler, bodySkipped);
  3760. // Fixup Try
  3761. compiler.DoFixup(tryFixup, compiler.CurrentLabel());
  3762. break;
  3763. }
  3764. }
  3765. }
  3766. CharCount LoopNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3767. {
  3768. Assert(false);
  3769. return 0;
  3770. }
  3771. bool LoopNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3772. {
  3773. return false;
  3774. }
  3775. bool LoopNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3776. {
  3777. return false;
  3778. }
  3779. bool LoopNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3780. {
  3781. Assert(false);
  3782. return false;
  3783. }
  3784. #if ENABLE_REGEX_CONFIG_OPTIONS
  3785. void LoopNode::Print(DebugWriter* w, const Char* litbuf) const
  3786. {
  3787. w->Print(_u("Loop("));
  3788. repeats.Print(w);
  3789. w->PrintEOL(_u(", %s)"), isGreedy ? _u("greedy") : _u("non-greedy"));
  3790. PrintAnnotations(w);
  3791. w->PrintEOL(_u("{"));
  3792. w->Indent();
  3793. body->Print(w, litbuf);
  3794. w->Unindent();
  3795. w->PrintEOL(_u("}"));
  3796. }
  3797. #endif
  3798. // ----------------------------------------------------------------------
  3799. // AssertionNode
  3800. // ----------------------------------------------------------------------
  3801. CharCount AssertionNode::LiteralLength() const
  3802. {
  3803. return 0;
  3804. }
  3805. bool AssertionNode::IsCharOrPositiveSet() const
  3806. {
  3807. return false;
  3808. }
  3809. CharCount AssertionNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3810. {
  3811. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3812. return body->TransferPass0(compiler, litbuf);
  3813. }
  3814. void AssertionNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3815. {
  3816. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3817. body->TransferPass1(compiler, litbuf);
  3818. }
  3819. bool AssertionNode::IsRefiningAssertion(Compiler& compiler)
  3820. {
  3821. return !isNegation;
  3822. }
  3823. void AssertionNode::AnnotatePass0(Compiler& compiler)
  3824. {
  3825. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3826. isWord = false;
  3827. body->AnnotatePass0(compiler);
  3828. }
  3829. void AssertionNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3830. {
  3831. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3832. features = HasAssertion;
  3833. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, false, parentNotNegated && !isNegation);
  3834. features |= body->features;
  3835. thisConsumes.Exact(0);
  3836. if (isNegation)
  3837. firstSet = compiler.standardChars->GetFullSet();
  3838. else
  3839. firstSet = body->firstSet;
  3840. isFirstExact = false;
  3841. if (isNegation)
  3842. // This will always fail
  3843. isThisIrrefutable = false;
  3844. else
  3845. // If body is irrefutable overall assertion is irrefutable
  3846. isThisIrrefutable = body->isThisIrrefutable;
  3847. isThisWillNotProgress = true;
  3848. isThisWillNotRegress = true;
  3849. isNotInLoop = parentNotInLoop;
  3850. isAtLeastOnce = parentAtLeastOnce;
  3851. isNotSpeculative = parentNotSpeculative;
  3852. isNotNegated = parentNotNegated;
  3853. }
  3854. void AssertionNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3855. {
  3856. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3857. prevConsumes = accumConsumes;
  3858. isPrevWillNotProgress = accumPrevWillNotProgress;
  3859. isPrevWillNotRegress = accumPrevWillNotRegress;
  3860. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  3861. }
  3862. void AssertionNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3863. {
  3864. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3865. followConsumes = accumConsumes;
  3866. followSet = accumFollow;
  3867. isFollowIrrefutable = accumFollowIrrefutable;
  3868. isFollowEOL = accumFollowEOL;
  3869. // Can't say anything about what the assertion body will see at its end
  3870. CountDomain innerConsumes;
  3871. CharSet<Char>* innerFollow = compiler.standardChars->GetFullSet();
  3872. // We can never backtrack into the body of an assertion (the continuation stack is cut)
  3873. body->AnnotatePass3(compiler, innerConsumes, innerFollow, true, false);
  3874. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3875. }
  3876. void AssertionNode::AnnotatePass4(Compiler& compiler)
  3877. {
  3878. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3879. body->AnnotatePass4(compiler);
  3880. // Even if body is non-deterministic we cut the choicepoints on exit from the assertion,
  3881. // so overall assertion is deterministic.
  3882. isDeterministic = true;
  3883. //
  3884. // Compilation scheme: Fail
  3885. //
  3886. // If body is irrefutable, assertion will always fail (and will leave groups empty).
  3887. //
  3888. if (isNegation && body->isThisIrrefutable)
  3889. {
  3890. // ***COMMIT***
  3891. scheme = Fail;
  3892. return;
  3893. }
  3894. //
  3895. // Compilation scheme: Succ
  3896. //
  3897. // If body is irrefutable, assertion will always succeed. If it does not define groups
  3898. // we can eliminate it altogether.
  3899. //
  3900. if (!isNegation && body->isThisIrrefutable && !body->ContainsDefineGroup())
  3901. {
  3902. // **COMMIT**
  3903. scheme = Succ;
  3904. return;
  3905. }
  3906. //
  3907. // Compilation scheme: BeginEnd
  3908. //
  3909. scheme = BeginEnd;
  3910. }
  3911. bool AssertionNode::SupportsPrefixSkipping(Compiler& compiler) const
  3912. {
  3913. return false;
  3914. }
  3915. Node* AssertionNode::HeadSyncronizingNode(Compiler& compiler)
  3916. {
  3917. return 0;
  3918. }
  3919. CharCount AssertionNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3920. {
  3921. return 0;
  3922. }
  3923. void AssertionNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3924. {
  3925. Assert(false);
  3926. }
  3927. void AssertionNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3928. {
  3929. }
  3930. void AssertionNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3931. {
  3932. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  3933. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3934. }
  3935. void AssertionNode::Emit(Compiler& compiler, CharCount& skipped)
  3936. {
  3937. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3938. Assert(skipped == 0);
  3939. switch (scheme)
  3940. {
  3941. case BeginEnd:
  3942. {
  3943. //
  3944. // Compilation scheme:
  3945. //
  3946. // BeginAssertion Lexit
  3947. // <body>
  3948. // EndAssertion
  3949. // Lexit:
  3950. //
  3951. int minBodyGroupId = compiler.program->numGroups;
  3952. int maxBodyGroupId = -1;
  3953. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3954. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginAssertionInst, isNegation, minBodyGroupId, maxBodyGroupId)->nextLabel);
  3955. body->Emit(compiler, skipped);
  3956. EMIT(compiler, EndAssertionInst);
  3957. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3958. break;
  3959. }
  3960. case Succ:
  3961. {
  3962. // Nothing to emit
  3963. break;
  3964. }
  3965. case Fail:
  3966. {
  3967. //
  3968. // Compilation scheme:
  3969. //
  3970. // Fail
  3971. //
  3972. EMIT(compiler, FailInst);
  3973. break;
  3974. }
  3975. }
  3976. }
  3977. CharCount AssertionNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3978. {
  3979. Assert(false);
  3980. return 0;
  3981. }
  3982. bool AssertionNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3983. {
  3984. return false;
  3985. }
  3986. bool AssertionNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3987. {
  3988. return false;
  3989. }
  3990. bool AssertionNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3991. {
  3992. Assert(false);
  3993. return false;
  3994. }
  3995. #if ENABLE_REGEX_CONFIG_OPTIONS
  3996. void AssertionNode::Print(DebugWriter* w, const Char* litbuf) const
  3997. {
  3998. w->PrintEOL(_u("Assertion(%s)"), isNegation ? _u("negative") : _u("positive"));
  3999. PrintAnnotations(w);
  4000. w->PrintEOL(_u("{"));
  4001. w->Indent();
  4002. body->Print(w, litbuf);
  4003. w->Unindent();
  4004. w->PrintEOL(_u("}"));
  4005. }
  4006. #endif
  4007. // ----------------------------------------------------------------------
  4008. // Compiler
  4009. // ----------------------------------------------------------------------
  4010. Compiler::Compiler
  4011. ( Js::ScriptContext* scriptContext
  4012. , ArenaAllocator* ctAllocator
  4013. , ArenaAllocator* rtAllocator
  4014. , StandardChars<Char>* standardChars
  4015. , Program* program
  4016. #if ENABLE_REGEX_CONFIG_OPTIONS
  4017. , DebugWriter* w
  4018. , RegexStats* stats
  4019. #endif
  4020. )
  4021. : scriptContext(scriptContext)
  4022. , ctAllocator(ctAllocator)
  4023. , rtAllocator(rtAllocator)
  4024. , standardChars(standardChars)
  4025. #if ENABLE_REGEX_CONFIG_OPTIONS
  4026. , w(w)
  4027. , stats(stats)
  4028. #endif
  4029. , program(program)
  4030. , instBuf(0)
  4031. , instLen(0)
  4032. , instNext(0)
  4033. , nextLoopId(0)
  4034. {}
  4035. void Compiler::CaptureNoLiterals(Program* program)
  4036. {
  4037. program->rep.insts.litbuf = nullptr;
  4038. program->rep.insts.litbufLen = 0;
  4039. }
  4040. void Compiler::CaptureLiterals(Node* root, const Char* litbuf)
  4041. {
  4042. // Program will own literal buffer. Prepare buffer and nodes for case-invariant matching if necessary.
  4043. CharCount finalLen = root->TransferPass0(*this, litbuf);
  4044. if (finalLen < root->LiteralLength()) // overflowed
  4045. {
  4046. Js::Throw::OutOfMemory();
  4047. }
  4048. program->rep.insts.litbuf = finalLen == 0 ? 0 : RecyclerNewArrayLeaf(scriptContext->GetRecycler(), Char, finalLen);
  4049. program->rep.insts.litbufLen = 0;
  4050. root->TransferPass1(*this, litbuf);
  4051. Assert(program->rep.insts.litbufLen == finalLen);
  4052. }
  4053. void Compiler::EmitAndCaptureSuccInst(Recycler* recycler, Program* program)
  4054. {
  4055. program->rep.insts.insts = (uint8*)RecyclerNewLeaf(recycler, SuccInst);
  4056. program->rep.insts.instsLen = sizeof(SuccInst);
  4057. program->numLoops = 0;
  4058. }
  4059. void Compiler::CaptureInsts()
  4060. {
  4061. program->rep.insts.insts = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), uint8, instNext);
  4062. program->rep.insts.instsLen = instNext;
  4063. memcpy_s(program->rep.insts.insts, instNext, instBuf, instNext);
  4064. program->numLoops = nextLoopId;
  4065. }
  4066. void Compiler::FreeBody()
  4067. {
  4068. if (instBuf != 0)
  4069. {
  4070. ctAllocator->Free(instBuf, instLen);
  4071. instBuf = 0;
  4072. instLen = 0;
  4073. instNext = 0;
  4074. }
  4075. }
  4076. void Compiler::CompileEmptyRegex
  4077. ( Program* program
  4078. , RegexPattern* pattern
  4079. #if ENABLE_REGEX_CONFIG_OPTIONS
  4080. , DebugWriter* w
  4081. , RegexStats* stats
  4082. #endif
  4083. )
  4084. {
  4085. program->tag = Program::ProgramTag::InstructionsTag;
  4086. CaptureNoLiterals(program);
  4087. EmitAndCaptureSuccInst(pattern->GetScriptContext()->GetRecycler(), program);
  4088. }
  4089. void Compiler::Compile
  4090. ( Js::ScriptContext* scriptContext
  4091. , ArenaAllocator* ctAllocator
  4092. , ArenaAllocator* rtAllocator
  4093. , StandardChars<Char>* standardChars
  4094. , Program *program
  4095. , Node* root
  4096. , const Char* litbuf
  4097. , RegexPattern* pattern
  4098. #if ENABLE_REGEX_CONFIG_OPTIONS
  4099. , DebugWriter* w
  4100. , RegexStats* stats
  4101. #endif
  4102. )
  4103. {
  4104. #if ENABLE_REGEX_CONFIG_OPTIONS
  4105. if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST))
  4106. {
  4107. w->PrintEOL(_u("REGEX AST /%s/ {"), PointerValue(program->source));
  4108. w->Indent();
  4109. root->Print(w, litbuf);
  4110. w->Unindent();
  4111. w->PrintEOL(_u("}"));
  4112. w->Flush();
  4113. }
  4114. #endif
  4115. Compiler compiler
  4116. ( scriptContext
  4117. , ctAllocator
  4118. , rtAllocator
  4119. , standardChars
  4120. , program
  4121. #if ENABLE_REGEX_CONFIG_OPTIONS
  4122. , w
  4123. , stats
  4124. #endif
  4125. );
  4126. bool compiled = false;
  4127. if (REGEX_CONFIG_FLAG(RegexOptimize))
  4128. {
  4129. // SPECIAL CASE: Octoquad/trigrams
  4130. // (must handle before converting to case-insensitive form since the later interferes with octoquad pattern recognizer)
  4131. if (OctoquadIdentifier::Qualifies(program))
  4132. {
  4133. int numCodes;
  4134. char localCodeToChar[TrigramAlphabet::AlphaCount];
  4135. char localCharToCode[TrigramAlphabet::AsciiTableSize];
  4136. char (*codeToChar)[TrigramAlphabet::AlphaCount];
  4137. char (*charToCode)[TrigramAlphabet::AsciiTableSize];
  4138. TrigramAlphabet *trigramAlphabet = scriptContext->GetTrigramAlphabet();
  4139. if(trigramAlphabet)
  4140. {
  4141. numCodes = TrigramAlphabet::AlphaCount;
  4142. codeToChar = &trigramAlphabet->alpha;
  4143. charToCode = &trigramAlphabet->alphaBits;
  4144. }
  4145. else
  4146. {
  4147. numCodes = 0;
  4148. codeToChar = &localCodeToChar;
  4149. charToCode = &localCharToCode;
  4150. }
  4151. OctoquadIdentifier oi(numCodes, *codeToChar, *charToCode);
  4152. // We haven't captured literals yet: temporarily set the program's litbuf to be the parser's litbuf
  4153. Assert(program->rep.insts.litbuf == nullptr);
  4154. program->rep.insts.litbuf = (Char*)litbuf;
  4155. if (root->IsOctoquad(compiler, &oi) && oi.IsOctoquad())
  4156. {
  4157. program->rep.insts.litbuf = nullptr;
  4158. oi.InitializeTrigramInfo(scriptContext, pattern);
  4159. program->tag = Program::ProgramTag::OctoquadTag;
  4160. program->rep.octoquad.matcher = OctoquadMatcher::New(scriptContext->GetRecycler(), standardChars, program->GetCaseMappingSource(), &oi);
  4161. compiled = true;
  4162. }
  4163. else
  4164. program->rep.insts.litbuf = nullptr;
  4165. }
  4166. }
  4167. if (!compiled)
  4168. {
  4169. if (REGEX_CONFIG_FLAG(RegexOptimize))
  4170. {
  4171. Char c = 0;
  4172. if (root->IsSingleChar(compiler, c))
  4173. {
  4174. // SPECIAL CASE: c
  4175. program->tag = Program::ProgramTag::SingleCharTag;
  4176. program->rep.singleChar.c = c;
  4177. }
  4178. else if (root->IsBoundedWord(compiler))
  4179. {
  4180. // SPECIAL CASE: \b\w+\b
  4181. program->tag = Program::ProgramTag::BoundedWordTag;
  4182. }
  4183. else if (root->IsLeadingTrailingSpaces(compiler,
  4184. program->rep.leadingTrailingSpaces.beginMinMatch,
  4185. program->rep.leadingTrailingSpaces.endMinMatch))
  4186. {
  4187. // SPECIAL CASE: ^\s*|\s*$
  4188. program->tag = Program::ProgramTag::LeadingTrailingSpacesTag;
  4189. }
  4190. else if (root->IsBOILiteral2(compiler))
  4191. {
  4192. program->tag = Program::ProgramTag::BOILiteral2Tag;
  4193. program->rep.boiLiteral2.literal = *(DWORD *)litbuf;
  4194. }
  4195. else
  4196. {
  4197. program->tag = Program::ProgramTag::InstructionsTag;
  4198. compiler.CaptureLiterals(root, litbuf);
  4199. root->AnnotatePass0(compiler);
  4200. root->AnnotatePass1(compiler, true, true, true, true);
  4201. // Nothing comes before or after overall pattern
  4202. CountDomain consumes(0);
  4203. // Match could progress from lhs (since we try successive start positions), but can never regress
  4204. root->AnnotatePass2(compiler, consumes, false, true);
  4205. // Anything could follow an end of pattern match
  4206. CharSet<Char>* follow = standardChars->GetFullSet();
  4207. root->AnnotatePass3(compiler, consumes, follow, true, false);
  4208. root->AnnotatePass4(compiler);
  4209. #if ENABLE_REGEX_CONFIG_OPTIONS
  4210. if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST) && REGEX_CONFIG_FLAG(RegexDebugAnnotatedAST))
  4211. {
  4212. w->PrintEOL(_u("REGEX ANNOTATED AST /%s/ {"), PointerValue(program->source));
  4213. w->Indent();
  4214. root->Print(w, program->rep.insts.litbuf);
  4215. w->Unindent();
  4216. w->PrintEOL(_u("}"));
  4217. w->Flush();
  4218. }
  4219. #endif
  4220. CharCount skipped = 0;
  4221. // If the root Node has a hard fail BOI, we should not emit any synchronize Nodes
  4222. // since we can easily just search from the beginning.
  4223. if (root->hasInitialHardFailBOI == false)
  4224. {
  4225. // If the root Node doesn't have hard fail BOI but sticky flag is present don't synchronize Nodes
  4226. // since we can easily just search from the beginning. Instead set to special InstructionTag
  4227. if ((program->flags & StickyRegexFlag) != 0)
  4228. {
  4229. compiler.SetBOIInstructionsProgramForStickyFlagTag();
  4230. }
  4231. else
  4232. {
  4233. Node* bestSyncronizingNode = 0;
  4234. root->BestSyncronizingNode(compiler, bestSyncronizingNode);
  4235. Node* headSyncronizingNode = root->HeadSyncronizingNode(compiler);
  4236. if ((bestSyncronizingNode == 0 && headSyncronizingNode != 0) ||
  4237. (bestSyncronizingNode != 0 && headSyncronizingNode == bestSyncronizingNode))
  4238. {
  4239. // Scan and consume the head, continue with rest assuming head has been consumed
  4240. skipped = headSyncronizingNode->EmitScan(compiler, true);
  4241. }
  4242. else if (bestSyncronizingNode != 0)
  4243. {
  4244. // Scan for the synchronizing node, then backup ready for entire pattern
  4245. skipped = bestSyncronizingNode->EmitScan(compiler, false);
  4246. Assert(skipped == 0);
  4247. // We're synchronizing to a non-head node; if we have to back up, then try to synchronize to a character
  4248. // in the first set before running the remaining instructions
  4249. if (!bestSyncronizingNode->prevConsumes.CouldMatchEmpty()) // must back up at least one character
  4250. skipped = root->EmitScanFirstSet(compiler);
  4251. }
  4252. else
  4253. {
  4254. // Optionally scan for a character in the overall pattern's FIRST set, possibly consume it,
  4255. // then match all or remainder of pattern
  4256. skipped = root->EmitScanFirstSet(compiler);
  4257. }
  4258. }
  4259. }
  4260. root->Emit(compiler, skipped);
  4261. compiler.Emit<SuccInst>();
  4262. compiler.CaptureInsts();
  4263. }
  4264. }
  4265. else
  4266. {
  4267. program->tag = Program::ProgramTag::InstructionsTag;
  4268. compiler.CaptureLiterals(root, litbuf);
  4269. CharCount skipped = 0;
  4270. root->Emit(compiler, skipped);
  4271. compiler.Emit<SuccInst>();
  4272. compiler.CaptureInsts();
  4273. }
  4274. }
  4275. #if ENABLE_REGEX_CONFIG_OPTIONS
  4276. if (w != 0)
  4277. {
  4278. w->PrintEOL(_u("REGEX PROGRAM /%s/"), PointerValue(program->source));
  4279. program->Print(w);
  4280. w->Flush();
  4281. }
  4282. #endif
  4283. }
  4284. }