RegexCompileTime.cpp 171 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. namespace UnifiedRegex
  7. {
  8. // ----------------------------------------------------------------------
  9. // Compiler (inlines etc)
  10. // ----------------------------------------------------------------------
  11. uint8* Compiler::Emit(size_t size)
  12. {
  13. Assert(size <= UINT32_MAX);
  14. if (instLen - instNext < size)
  15. {
  16. CharCount newLen = max(instLen, initInstBufSize);
  17. CharCount instLenPlus = (CharCount)(instLen + size - 1);
  18. // check for overflow
  19. if (instLenPlus < instLen || instLenPlus * 2 < instLenPlus)
  20. {
  21. Js::Throw::OutOfMemory();
  22. }
  23. while (newLen <= instLenPlus)
  24. {
  25. newLen *= 2;
  26. }
  27. instBuf = (uint8*)ctAllocator->Realloc(instBuf, instLen, newLen);
  28. instLen = newLen;
  29. }
  30. uint8* inst = instBuf + instNext;
  31. instNext += (CharCount)size;
  32. return inst;
  33. }
  34. template <typename T>
  35. T* Compiler::Emit()
  36. {
  37. return new(Emit(sizeof(T))) T;
  38. }
  39. #define EMIT(compiler, T, ...) (new (compiler.Emit(sizeof(T))) T(__VA_ARGS__))
  40. #define L2I(O, label) LabelToInstPointer<O##Inst>(Inst::O, label)
  41. // Remember: The machine address of an instruction is no longer valid after a subsequent emit,
  42. // so all label fixups must be done using Compiler::GetFixup / Compiler::DoFixup
  43. // ----------------------------------------------------------------------
  44. // Node
  45. // ----------------------------------------------------------------------
  46. void Node::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  47. {
  48. Assert(false);
  49. }
  50. CharCount Node::EmitScanFirstSet(Compiler& compiler)
  51. {
  52. Assert(prevConsumes.IsExact(0));
  53. if (thisConsumes.CouldMatchEmpty())
  54. // Can't be sure of consuming something in FIRST
  55. return 0;
  56. if (firstSet->Count() > maxSyncToSetSize)
  57. // HEURISTIC: If FIRST is large we'll get too many false positives
  58. return 0;
  59. //
  60. // Compilation scheme:
  61. //
  62. // SyncTo(Char|Char2|Set)And(Consume|Continue)
  63. //
  64. Char entries[CharSet<Char>::MaxCompact];
  65. int count = firstSet->GetCompactEntries(2, entries);
  66. if (SupportsPrefixSkipping(compiler))
  67. {
  68. if (count == 1)
  69. EMIT(compiler, SyncToCharAndConsumeInst, entries[0]);
  70. else if (count == 2)
  71. EMIT(compiler, SyncToChar2SetAndConsumeInst, entries[0], entries[1]);
  72. else
  73. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  74. return 1;
  75. }
  76. else
  77. {
  78. if (count == 1)
  79. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  80. else if (count == 2)
  81. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  82. else
  83. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  84. return 0;
  85. }
  86. }
  87. bool Node::IsBetterSyncronizingNode(Compiler& compiler, Node* curr, Node* proposed)
  88. {
  89. int proposedNumLiterals = 0;
  90. CharCount proposedLength = proposed->MinSyncronizingLiteralLength(compiler, proposedNumLiterals);
  91. if (proposedLength == 0 || proposedNumLiterals > maxNumSyncLiterals)
  92. // Not a synchronizable node or too many literals.
  93. return false;
  94. if (curr == nullptr)
  95. // We'll take whatever we can get
  96. return true;
  97. int currNumLiterals = 0;
  98. CharCount currLength = curr->MinSyncronizingLiteralLength(compiler, currNumLiterals);
  99. // Lexicographic ordering based on
  100. // - whether literal length is above a threshold (above is better)
  101. // - number of literals (smaller is better)
  102. // - upper bound on backup (finite is better)
  103. // - minimum literal length (longer is better)
  104. // - actual backup upper bound (shorter is better)
  105. if (proposedLength >= preferredMinSyncToLiteralLength
  106. && currLength < preferredMinSyncToLiteralLength)
  107. {
  108. return true;
  109. }
  110. if (proposedLength < preferredMinSyncToLiteralLength
  111. && currLength >= preferredMinSyncToLiteralLength)
  112. {
  113. return false;
  114. }
  115. if (proposedNumLiterals < currNumLiterals)
  116. return true;
  117. if (proposedNumLiterals > currNumLiterals)
  118. return false;
  119. if (!proposed->prevConsumes.IsUnbounded() && curr->prevConsumes.IsUnbounded())
  120. return true;
  121. if (proposed->prevConsumes.IsUnbounded() && !curr->prevConsumes.IsUnbounded())
  122. return false;
  123. if (proposedLength > currLength)
  124. return true;
  125. if (proposedLength < currLength)
  126. return false;
  127. return proposed->prevConsumes.upper < curr->prevConsumes.upper;
  128. }
  129. bool Node::IsSingleChar(Compiler& compiler, Char& outChar) const
  130. {
  131. if (tag != Node::MatchChar)
  132. return false;
  133. const MatchCharNode* node = (const MatchCharNode*)this;
  134. if (node->isEquivClass)
  135. return false;
  136. outChar = node->cs[0];
  137. return true;
  138. }
  139. bool Node::IsBoundedWord(Compiler& compiler) const
  140. {
  141. if (tag != Node::Concat)
  142. return false;
  143. const ConcatNode* concatNode = (const ConcatNode *)this;
  144. if (concatNode->head->tag != Node::WordBoundary ||
  145. concatNode->tail == 0 ||
  146. concatNode->tail->head->tag != Node::Loop ||
  147. concatNode->tail->tail == 0 ||
  148. concatNode->tail->tail->head->tag != Node::WordBoundary ||
  149. concatNode->tail->tail->tail != 0)
  150. return false;
  151. const WordBoundaryNode* enter = (const WordBoundaryNode*)concatNode->head;
  152. const LoopNode* loop = (const LoopNode*)concatNode->tail->head;
  153. const WordBoundaryNode* leave = (const WordBoundaryNode*)concatNode->tail->tail->head;
  154. if (enter->isNegation ||
  155. !loop->isGreedy ||
  156. loop->repeats.lower != 1 ||
  157. loop->repeats.upper != CharCountFlag ||
  158. loop->body->tag != Node::MatchSet ||
  159. leave->isNegation)
  160. return false;
  161. const MatchSetNode* wordSet = (const MatchSetNode*)loop->body;
  162. if (wordSet->isNegation)
  163. return false;
  164. return wordSet->set.IsEqualTo(*compiler.standardChars->GetWordSet());
  165. }
  166. bool Node::IsBOILiteral2(Compiler& compiler) const
  167. {
  168. if (tag != Node::Concat)
  169. return false;
  170. const ConcatNode* concatNode = (const ConcatNode *)this;
  171. if ((compiler.program->flags & (IgnoreCaseRegexFlag | MultilineRegexFlag)) != 0 ||
  172. concatNode->head->tag != Node::BOL ||
  173. concatNode->tail == nullptr ||
  174. concatNode->tail->head->tag != Node::MatchLiteral ||
  175. concatNode->tail->tail != nullptr ||
  176. ((MatchLiteralNode *)concatNode->tail->head)->isEquivClass ||
  177. ((MatchLiteralNode *)concatNode->tail->head)->length != 2)
  178. {
  179. return false;
  180. }
  181. return true;
  182. }
  183. bool Node::IsLeadingTrailingSpaces(Compiler& compiler, CharCount& leftMinMatch, CharCount& rightMinMatch) const
  184. {
  185. if (tag != Node::Alt)
  186. return false;
  187. if (compiler.program->flags & MultilineRegexFlag)
  188. return false;
  189. const AltNode* altNode = (const AltNode*)this;
  190. if (altNode->head->tag != Node::Concat ||
  191. altNode->tail == 0 ||
  192. altNode->tail->head->tag != Node::Concat ||
  193. altNode->tail->tail != 0)
  194. return false;
  195. const ConcatNode* left = (const ConcatNode*)altNode->head;
  196. const ConcatNode* right = (const ConcatNode*)altNode->tail->head;
  197. if (left->head->tag != Node::BOL ||
  198. left->tail == 0 ||
  199. left->tail->head->tag != Node::Loop ||
  200. left->tail->tail != 0)
  201. return false;
  202. if (right->head->tag != Node::Loop ||
  203. right->tail == 0 ||
  204. right->tail->head->tag != Node::EOL ||
  205. right->tail->tail != 0)
  206. return false;
  207. const LoopNode* leftLoop = (const LoopNode*)left->tail->head;
  208. const LoopNode* rightLoop = (const LoopNode*)right->head;
  209. if (!leftLoop->isGreedy ||
  210. leftLoop->repeats.upper != CharCountFlag ||
  211. leftLoop->body->tag != Node::MatchSet ||
  212. !rightLoop->isGreedy ||
  213. rightLoop->repeats.upper != CharCountFlag ||
  214. rightLoop->body->tag != Node::MatchSet)
  215. return false;
  216. const MatchSetNode* leftSet = (const MatchSetNode*)leftLoop->body;
  217. const MatchSetNode* rightSet = (const MatchSetNode*)rightLoop->body;
  218. if (leftSet->isNegation ||
  219. rightSet->isNegation)
  220. return false;
  221. leftMinMatch = leftLoop->repeats.lower;
  222. rightMinMatch = rightLoop->repeats.lower;
  223. return
  224. leftSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet()) &&
  225. rightSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet());
  226. }
  227. #if ENABLE_REGEX_CONFIG_OPTIONS
  228. void Node::PrintAnnotations(DebugWriter* w) const
  229. {
  230. if (firstSet != 0)
  231. {
  232. w->PrintEOL(_u("<"));
  233. w->Indent();
  234. w->Print(_u("features: {"));
  235. bool first = true;
  236. for (uint i = Empty; i <= Assertion; i++)
  237. {
  238. if ((features & (1 << i)) != 0)
  239. {
  240. if (first)
  241. first = false;
  242. else
  243. w->Print(_u(","));
  244. switch (i)
  245. {
  246. case Empty: w->Print(_u("Empty")); break;
  247. case BOL: w->Print(_u("BOL")); break;
  248. case EOL: w->Print(_u("EOL")); break;
  249. case WordBoundary: w->Print(_u("WordBoundary")); break;
  250. case MatchLiteral: w->Print(_u("MatchLiteral")); break;
  251. case MatchChar: w->Print(_u("MatchChar")); break;
  252. case Concat: w->Print(_u("Concat")); break;
  253. case Alt: w->Print(_u("Alt")); break;
  254. case DefineGroup: w->Print(_u("DefineGroup")); break;
  255. case MatchGroup: w->Print(_u("MatchGroup")); break;
  256. case Loop: w->Print(_u("Loop")); break;
  257. case MatchSet: w->Print(_u("MatchSet")); break;
  258. case Assertion: w->Print(_u("Assertion")); break;
  259. }
  260. }
  261. }
  262. w->PrintEOL(_u("}"));
  263. w->Print(_u("firstSet: "));
  264. firstSet->Print(w);
  265. if (isFirstExact)
  266. w->Print(_u(" (exact)"));
  267. w->EOL();
  268. w->Print(_u("followSet: "));
  269. followSet->Print(w);
  270. w->EOL();
  271. w->Print(_u("prevConsumes: "));
  272. prevConsumes.Print(w);
  273. w->EOL();
  274. w->Print(_u("thisConsumes: "));
  275. thisConsumes.Print(w);
  276. w->EOL();
  277. w->Print(_u("followConsumes: "));
  278. followConsumes.Print(w);
  279. w->EOL();
  280. w->PrintEOL(_u("isThisIrrefutable: %s"), isThisIrrefutable ? _u("true") : _u("false"));
  281. w->PrintEOL(_u("isFollowIrrefutable: %s"), isFollowIrrefutable ? _u("true") : _u("false"));
  282. w->PrintEOL(_u("isWord: %s"), isWord ? _u("true") : _u("false"));
  283. w->PrintEOL(_u("isThisWillNotProgress: %s"), isThisWillNotProgress ? _u("true") : _u("false"));
  284. w->PrintEOL(_u("isThisWillNotRegress: %s"), isThisWillNotRegress ? _u("true") : _u("false"));
  285. w->PrintEOL(_u("isPrevWillNotProgress: %s"), isPrevWillNotProgress ? _u("true") : _u("false"));
  286. w->PrintEOL(_u("isPrevWillNotRegress: %s"), isPrevWillNotRegress ? _u("true") : _u("false"));
  287. w->PrintEOL(_u("isDeterministic: %s"), isDeterministic ? _u("true") : _u("false"));
  288. w->PrintEOL(_u("isNotInLoop: %s"), isNotInLoop ? _u("true") : _u("false"));
  289. w->PrintEOL(_u("isNotNegated: %s"), isNotNegated ? _u("true") : _u("false"));
  290. w->PrintEOL(_u("isAtLeastOnce: %s"), isAtLeastOnce ? _u("true") : _u("false"));
  291. w->PrintEOL(_u("hasInitialHardFailBOI: %s"), hasInitialHardFailBOI ? _u("true") : _u("false"));
  292. w->Unindent();
  293. w->PrintEOL(_u(">"));
  294. }
  295. }
  296. #endif
  297. // ----------------------------------------------------------------------
  298. // SimpleNode
  299. // ----------------------------------------------------------------------
  300. CharCount SimpleNode::LiteralLength() const
  301. {
  302. return 0;
  303. }
  304. bool SimpleNode::IsCharOrPositiveSet() const
  305. {
  306. return false;
  307. }
  308. CharCount SimpleNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  309. {
  310. return 0;
  311. }
  312. void SimpleNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  313. {
  314. }
  315. bool SimpleNode::IsRefiningAssertion(Compiler& compiler)
  316. {
  317. return tag == EOL && (compiler.program->flags & MultilineRegexFlag) != 0;
  318. }
  319. void SimpleNode::AnnotatePass0(Compiler& compiler)
  320. {
  321. isWord = false;
  322. }
  323. void SimpleNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  324. {
  325. isFirstExact = false;
  326. thisConsumes.Exact(0);
  327. isThisWillNotProgress = true;
  328. isThisWillNotRegress = true;
  329. isNotInLoop = parentNotInLoop;
  330. isAtLeastOnce = parentAtLeastOnce;
  331. isNotSpeculative = parentNotSpeculative;
  332. isNotNegated = parentNotNegated;
  333. switch (tag)
  334. {
  335. case Empty:
  336. features = HasEmpty;
  337. firstSet = compiler.standardChars->GetEmptySet();
  338. isThisIrrefutable = true;
  339. break;
  340. case BOL:
  341. features = HasBOL;
  342. firstSet = compiler.standardChars->GetFullSet();
  343. isThisIrrefutable = false;
  344. break;
  345. case EOL:
  346. features = HasEOL;
  347. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  348. firstSet = compiler.standardChars->GetNewlineSet();
  349. else
  350. firstSet = compiler.standardChars->GetEmptySet();
  351. isThisIrrefutable = false;
  352. break;
  353. default:
  354. Assert(false);
  355. }
  356. }
  357. void SimpleNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  358. {
  359. prevConsumes = accumConsumes;
  360. isPrevWillNotProgress = accumPrevWillNotProgress;
  361. isPrevWillNotRegress = accumPrevWillNotRegress;
  362. }
  363. void SimpleNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  364. {
  365. followConsumes = accumConsumes;
  366. followSet = accumFollow;
  367. isFollowIrrefutable = accumFollowIrrefutable;
  368. isFollowEOL = accumFollowEOL;
  369. hasInitialHardFailBOI = ((tag == BOL) &&
  370. prevConsumes.IsExact(0) &&
  371. (compiler.program->flags & MultilineRegexFlag) == 0 &&
  372. isAtLeastOnce &&
  373. isNotNegated &&
  374. isPrevWillNotRegress);
  375. }
  376. void SimpleNode::AnnotatePass4(Compiler& compiler)
  377. {
  378. isDeterministic = true;
  379. }
  380. bool SimpleNode::SupportsPrefixSkipping(Compiler& compiler) const
  381. {
  382. return false;
  383. }
  384. Node* SimpleNode::HeadSyncronizingNode(Compiler& compiler)
  385. {
  386. return 0;
  387. }
  388. CharCount SimpleNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  389. {
  390. return 0;
  391. }
  392. void SimpleNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  393. {
  394. Assert(false);
  395. }
  396. void SimpleNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  397. {
  398. }
  399. void SimpleNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  400. {
  401. }
  402. void SimpleNode::Emit(Compiler& compiler, CharCount& skipped)
  403. {
  404. Assert(skipped == 0);
  405. switch (tag)
  406. {
  407. case Empty:
  408. // Nothing
  409. break;
  410. case BOL:
  411. {
  412. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  413. {
  414. //
  415. // Compilation scheme:
  416. //
  417. // BOLTest
  418. //
  419. EMIT(compiler, BOLTestInst);
  420. }
  421. else
  422. {
  423. if (compiler.CurrentLabel() == 0)
  424. {
  425. // The first instruction is BOI, change the tag and only execute it once
  426. // without looping every start position
  427. compiler.SetBOIInstructionsProgramTag();
  428. }
  429. else
  430. {
  431. //
  432. // Compilation scheme:
  433. //
  434. // BOITest
  435. //
  436. // Obviously starting later in the string won't help, so can hard fail if:
  437. // - this pattern must always be matched
  438. // - not in a negative assertion
  439. // - backtracking could never rewind the input pointer
  440. //
  441. EMIT(compiler, BOITestInst, isAtLeastOnce && isNotNegated && isPrevWillNotRegress);
  442. }
  443. }
  444. break;
  445. }
  446. case EOL:
  447. {
  448. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  449. //
  450. // Compilation scheme:
  451. //
  452. // EOLTest
  453. //
  454. EMIT(compiler, EOLTestInst);
  455. else
  456. //
  457. // Compilation scheme:
  458. //
  459. // EOITest
  460. //
  461. // Can hard fail if
  462. // - this pattern must always be matched
  463. // - not in a negative assertion
  464. // - backtracking could never advance the input pointer
  465. //
  466. EMIT(compiler, EOITestInst, isAtLeastOnce && isNotNegated && isPrevWillNotProgress);
  467. break;
  468. }
  469. default:
  470. Assert(false);
  471. }
  472. }
  473. CharCount SimpleNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  474. {
  475. Assert(false);
  476. return 0;
  477. }
  478. bool SimpleNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  479. {
  480. return false;
  481. }
  482. bool SimpleNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  483. {
  484. return tag == Empty;
  485. }
  486. bool SimpleNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  487. {
  488. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  489. Assert(tag == Empty);
  490. if (cont == 0)
  491. {
  492. if (trie->Count() > 0)
  493. // This literal is a proper prefix of an earlier literal
  494. return false;
  495. trie->SetAccepting();
  496. return true;
  497. }
  498. return cont->BuildCharTrie(compiler, trie, 0, isAcceptFirst);
  499. }
  500. #if ENABLE_REGEX_CONFIG_OPTIONS
  501. void SimpleNode::Print(DebugWriter* w, const Char* litbuf) const
  502. {
  503. switch (tag)
  504. {
  505. case Empty:
  506. w->Print(_u("Empty")); break;
  507. case BOL:
  508. w->Print(_u("BOL")); break;
  509. case EOL:
  510. w->Print(_u("EOL")); break;
  511. default:
  512. Assert(false);
  513. }
  514. w->PrintEOL(_u("()"));
  515. PrintAnnotations(w);
  516. }
  517. #endif
  518. // ----------------------------------------------------------------------
  519. // WordBoundaryNode
  520. // ----------------------------------------------------------------------
  521. CharCount WordBoundaryNode::LiteralLength() const
  522. {
  523. return 0;
  524. }
  525. bool WordBoundaryNode::IsCharOrPositiveSet() const
  526. {
  527. return false;
  528. }
  529. CharCount WordBoundaryNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  530. {
  531. return 0;
  532. }
  533. void WordBoundaryNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  534. {
  535. // WordChars and NonWordChars sets are already case invariant
  536. }
  537. bool WordBoundaryNode::IsRefiningAssertion(Compiler& compiler)
  538. {
  539. return mustIncludeEntering != mustIncludeLeaving;
  540. }
  541. void WordBoundaryNode::AnnotatePass0(Compiler& compiler)
  542. {
  543. isWord = false;
  544. }
  545. void WordBoundaryNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  546. {
  547. features = HasWordBoundary;
  548. thisConsumes.Exact(0);
  549. isFirstExact = false;
  550. isThisIrrefutable = false;
  551. isThisWillNotProgress = true;
  552. isThisWillNotRegress = true;
  553. isNotInLoop = parentNotInLoop;
  554. isAtLeastOnce = parentAtLeastOnce;
  555. isNotSpeculative = parentNotSpeculative;
  556. isNotNegated = parentNotNegated;
  557. if (isNegation)
  558. firstSet = compiler.standardChars->GetFullSet();
  559. else
  560. {
  561. if (mustIncludeEntering && !mustIncludeLeaving)
  562. firstSet = compiler.standardChars->GetWordSet();
  563. else if (mustIncludeLeaving && !mustIncludeEntering)
  564. firstSet = compiler.standardChars->GetNonWordSet();
  565. else
  566. firstSet = compiler.standardChars->GetFullSet();
  567. }
  568. }
  569. void WordBoundaryNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  570. {
  571. prevConsumes = accumConsumes;
  572. isPrevWillNotProgress = accumPrevWillNotProgress;
  573. isPrevWillNotRegress = accumPrevWillNotRegress;
  574. }
  575. void WordBoundaryNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  576. {
  577. followConsumes = accumConsumes;
  578. followSet = accumFollow;
  579. isFollowIrrefutable = accumFollowIrrefutable;
  580. isFollowEOL = accumFollowEOL;
  581. }
  582. void WordBoundaryNode::AnnotatePass4(Compiler& compiler)
  583. {
  584. isDeterministic = true;
  585. }
  586. bool WordBoundaryNode::SupportsPrefixSkipping(Compiler& compiler) const
  587. {
  588. return false;
  589. }
  590. Node* WordBoundaryNode::HeadSyncronizingNode(Compiler& compiler)
  591. {
  592. return 0;
  593. }
  594. CharCount WordBoundaryNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  595. {
  596. return 0;
  597. }
  598. void WordBoundaryNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  599. {
  600. Assert(false);
  601. }
  602. void WordBoundaryNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  603. {
  604. }
  605. void WordBoundaryNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  606. {
  607. }
  608. void WordBoundaryNode::Emit(Compiler& compiler, CharCount& skipped)
  609. {
  610. Assert(skipped == 0);
  611. //
  612. // Compilation scheme:
  613. //
  614. // WordBoundaryTest
  615. //
  616. EMIT(compiler, WordBoundaryTestInst, isNegation);
  617. }
  618. CharCount WordBoundaryNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  619. {
  620. Assert(false);
  621. return 0;
  622. }
  623. bool WordBoundaryNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  624. {
  625. return false;
  626. }
  627. bool WordBoundaryNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  628. {
  629. return false;
  630. }
  631. bool WordBoundaryNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  632. {
  633. Assert(false);
  634. return false;
  635. }
  636. #if ENABLE_REGEX_CONFIG_OPTIONS
  637. void WordBoundaryNode::Print(DebugWriter* w, const Char* litbuf) const
  638. {
  639. w->PrintEOL(_u("WordBoundary(%s, %s, %s)"), isNegation ? _u("negative") : _u("positive"), mustIncludeEntering ? _u("entering") : _u("-"), mustIncludeLeaving ? _u("leaving") : _u("-"));
  640. PrintAnnotations(w);
  641. }
  642. #endif
  643. // ----------------------------------------------------------------------
  644. // MatchLiteralNode
  645. // ----------------------------------------------------------------------
  646. CharCount MatchLiteralNode::LiteralLength() const
  647. {
  648. return length;
  649. }
  650. void MatchLiteralNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  651. {
  652. // Called during parsing only, so literal always in original form
  653. Assert(!isEquivClass);
  654. Assert(litbufNext + length <= litbufLen && offset + length <= litbufLen);
  655. #pragma prefast(suppress:26000, "The error said that offset + length >= litbufLen + 1, which is incorrect due to if statement below.")
  656. if (litbufNext + length <= litbufLen && offset + length <= litbufLen) // for prefast
  657. {
  658. js_wmemcpy_s(litbuf + litbufNext, litbufLen - litbufNext, litbuf + offset, length);
  659. }
  660. litbufNext += length;
  661. }
  662. bool MatchLiteralNode::IsCharOrPositiveSet() const
  663. {
  664. return false;
  665. }
  666. CharCount MatchLiteralNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  667. {
  668. Assert(length > 1);
  669. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0
  670. && !compiler.standardChars->IsTrivialString(compiler.program->GetCaseMappingSource(), litbuf + offset, length))
  671. {
  672. // We'll need to expand each character of literal into its equivalence class
  673. isEquivClass = true;
  674. return length * CaseInsensitive::EquivClassSize;
  675. }
  676. else
  677. return length;
  678. }
  679. void MatchLiteralNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  680. {
  681. CharCount nextLit = compiler.program->rep.insts.litbufLen;
  682. if (isEquivClass)
  683. {
  684. Assert((compiler.program->flags & IgnoreCaseRegexFlag) != 0);
  685. // Expand literal according to character equivalence classes
  686. for (CharCount i = 0; i < length; i++)
  687. {
  688. compiler.standardChars->ToEquivs(
  689. compiler.program->GetCaseMappingSource(),
  690. litbuf[offset + i],
  691. compiler.program->rep.insts.litbuf + nextLit + i * CaseInsensitive::EquivClassSize);
  692. }
  693. compiler.program->rep.insts.litbufLen += length * CaseInsensitive::EquivClassSize;
  694. }
  695. else
  696. {
  697. for (CharCount i = 0; i < length; i++)
  698. compiler.program->rep.insts.litbuf[nextLit + i] = litbuf[offset + i];
  699. compiler.program->rep.insts.litbufLen += length;
  700. }
  701. offset = nextLit;
  702. }
  703. void MatchLiteralNode::AnnotatePass0(Compiler& compiler)
  704. {
  705. const Char* litbuf = compiler.program->rep.insts.litbuf;
  706. for (CharCount i = offset; i < offset + length; i++)
  707. {
  708. if (!compiler.standardChars->IsWord(litbuf[i]))
  709. {
  710. isWord = false;
  711. return;
  712. }
  713. }
  714. isWord = true;
  715. }
  716. bool MatchLiteralNode::IsRefiningAssertion(Compiler& compiler)
  717. {
  718. return false;
  719. }
  720. void MatchLiteralNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  721. {
  722. features = HasMatchLiteral;
  723. thisConsumes.Exact(length);
  724. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  725. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  726. firstSet->Set(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  727. isFirstExact = true;
  728. isThisIrrefutable = false;
  729. isThisWillNotProgress = true;
  730. isThisWillNotRegress = true;
  731. isNotInLoop = parentNotInLoop;
  732. isAtLeastOnce = parentAtLeastOnce;
  733. isNotSpeculative = parentNotSpeculative;
  734. isNotNegated = parentNotNegated;
  735. }
  736. void MatchLiteralNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  737. {
  738. prevConsumes = accumConsumes;
  739. isPrevWillNotProgress = accumPrevWillNotProgress;
  740. isPrevWillNotRegress = accumPrevWillNotRegress;
  741. }
  742. void MatchLiteralNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  743. {
  744. followConsumes = accumConsumes;
  745. followSet = accumFollow;
  746. isFollowIrrefutable = accumFollowIrrefutable;
  747. isFollowEOL = accumFollowEOL;
  748. }
  749. void MatchLiteralNode::AnnotatePass4(Compiler& compiler)
  750. {
  751. isDeterministic = true;
  752. }
  753. bool MatchLiteralNode::SupportsPrefixSkipping(Compiler& compiler) const
  754. {
  755. return true;
  756. }
  757. Node* MatchLiteralNode::HeadSyncronizingNode(Compiler& compiler)
  758. {
  759. return this;
  760. }
  761. CharCount MatchLiteralNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  762. {
  763. numLiterals++;
  764. return length;
  765. }
  766. void MatchLiteralNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  767. {
  768. ScannerMixin* scanner =
  769. scanners.Add(compiler.GetScriptContext()->GetRecycler(), compiler.GetProgram(), offset, length, isEquivClass);
  770. scanner->scanner.Setup(compiler.rtAllocator, compiler.program->rep.insts.litbuf + offset, length, isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  771. }
  772. void MatchLiteralNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  773. {
  774. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  775. bestNode = this;
  776. }
  777. void MatchLiteralNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  778. {
  779. }
  780. void MatchLiteralNode::Emit(Compiler& compiler, CharCount& skipped)
  781. {
  782. if (skipped >= length)
  783. {
  784. // Asking to skip entire literal
  785. skipped -= length;
  786. return;
  787. }
  788. //
  789. // Compilation scheme:
  790. //
  791. // Match(Char|Char4|Literal|LiteralEquiv)Inst
  792. //
  793. CharCount effectiveOffset = offset + skipped * (isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  794. CharCount effectiveLength = length - skipped;
  795. skipped -= min(skipped, length);
  796. if (effectiveLength == 1)
  797. {
  798. Char* cs = compiler.program->rep.insts.litbuf + effectiveOffset;
  799. MatchCharNode::Emit(compiler, cs, isEquivClass);
  800. }
  801. else
  802. {
  803. if (isEquivClass)
  804. EMIT(compiler, MatchLiteralEquivInst, effectiveOffset, effectiveLength);
  805. else
  806. EMIT(compiler, MatchLiteralInst, effectiveOffset, effectiveLength);
  807. }
  808. }
  809. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  810. CharCount MatchLiteralNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  811. {
  812. //
  813. // Compilation scheme:
  814. //
  815. // SyncTo(Literal|LiteralEquiv|LinearLiteral)And(Continue|Consume|Backup)
  816. //
  817. Char * litptr = compiler.program->rep.insts.litbuf + offset;
  818. if (isHeadSyncronizingNode)
  819. {
  820. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  821. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  822. if (isEquivClass)
  823. {
  824. const uint lastPatCharIndex = length - 1;
  825. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  826. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  827. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  828. {
  829. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  830. }
  831. else
  832. {
  833. EMIT(compiler, SyncToLiteralEquivAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  834. }
  835. }
  836. else if (length == 1)
  837. EMIT(compiler, SyncToCharAndConsumeInst, litptr[0]);
  838. else if (length == 2)
  839. EMIT(compiler, SyncToChar2LiteralAndConsumeInst, litptr[0], litptr[1]);
  840. else
  841. {
  842. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  843. switch (setup.GetScheme())
  844. {
  845. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  846. EMIT(compiler, SyncToLinearLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  847. break;
  848. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  849. EMIT(compiler, SyncToLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  850. break;
  851. };
  852. }
  853. return length;
  854. }
  855. else
  856. {
  857. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  858. // inside a group for instance, then we won't need to back up but we cannot consume the literal.
  859. if (prevConsumes.IsExact(0))
  860. {
  861. if (isEquivClass)
  862. {
  863. const uint lastPatCharIndex = length - 1;
  864. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  865. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  866. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  867. {
  868. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  869. }
  870. else
  871. {
  872. EMIT(compiler, SyncToLiteralEquivAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  873. }
  874. }
  875. else if (length == 1)
  876. EMIT(compiler, SyncToCharAndContinueInst, litptr[0]);
  877. else if (length == 2)
  878. EMIT(compiler, SyncToChar2LiteralAndContinueInst, litptr[0], litptr[1]);
  879. else
  880. {
  881. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  882. switch (setup.GetScheme())
  883. {
  884. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  885. EMIT(compiler, SyncToLinearLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  886. break;
  887. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  888. EMIT(compiler, SyncToLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  889. break;
  890. };
  891. }
  892. }
  893. else
  894. {
  895. if (isEquivClass)
  896. {
  897. const uint lastPatCharIndex = length - 1;
  898. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  899. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  900. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  901. {
  902. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  903. }
  904. else
  905. {
  906. EMIT(compiler, SyncToLiteralEquivAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  907. }
  908. }
  909. else if (length == 1)
  910. EMIT(compiler, SyncToCharAndBackupInst, litptr[0], prevConsumes);
  911. else if (length == 2)
  912. EMIT(compiler, SyncToChar2LiteralAndBackupInst, litptr[0], litptr[1], prevConsumes);
  913. else
  914. {
  915. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  916. switch (setup.GetScheme())
  917. {
  918. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  919. EMIT(compiler, SyncToLinearLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  920. break;
  921. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  922. EMIT(compiler, SyncToLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  923. break;
  924. };
  925. }
  926. }
  927. return 0;
  928. }
  929. }
  930. bool MatchLiteralNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  931. {
  932. // We look for octoquad patterns before converting for case-insensitivity
  933. Assert(!isEquivClass);
  934. if (!oi->CouldAppend(length))
  935. return false;
  936. for (CharCount i = 0; i < length; i++)
  937. {
  938. if (!oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), compiler.program->rep.insts.litbuf[offset + i])))
  939. return false;
  940. }
  941. return true;
  942. }
  943. bool MatchLiteralNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  944. {
  945. if (isEquivClass)
  946. // The literal would expand into length^3 alternatives
  947. return false;
  948. return true;
  949. }
  950. bool MatchLiteralNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  951. {
  952. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  953. Assert(!isEquivClass);
  954. CharTrie* tail = trie;
  955. for (CharCount i = 0; i < length; i++)
  956. {
  957. if (tail->IsAccepting())
  958. {
  959. // An earlier literal is a prefix of this literal
  960. // If isAcceptFirst, can ignore suffix of already recognized literal.
  961. // Otherwise, must fail.
  962. return isAcceptFirst;
  963. }
  964. CharTrie* newTail = tail->Add(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  965. if (tail->Count() > maxTrieArmExpansion)
  966. return false;
  967. tail = newTail;
  968. }
  969. if (cont == 0)
  970. {
  971. if (tail->Count() > 0)
  972. // This literal is a proper prefix of an earlier literal
  973. return false;
  974. tail->SetAccepting();
  975. }
  976. else
  977. {
  978. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  979. return false;
  980. }
  981. return true;
  982. }
  983. #if ENABLE_REGEX_CONFIG_OPTIONS
  984. void MatchLiteralNode::Print(DebugWriter* w, const Char* litbuf) const
  985. {
  986. w->Print(_u("MatchLiteral("));
  987. int skip = isEquivClass ? CaseInsensitive::EquivClassSize : 1;
  988. for (int i = 0; i < skip; i++)
  989. {
  990. if (i > 0)
  991. w->Print(_u(", "));
  992. w->Print(_u("\""));
  993. for (CharCount j = 0; j < length; j++)
  994. w->PrintEscapedChar(litbuf[offset + j * skip + i]);
  995. w->Print(_u("\""));
  996. }
  997. w->PrintEOL(_u(")"));
  998. PrintAnnotations(w);
  999. }
  1000. #endif
  1001. // ----------------------------------------------------------------------
  1002. // MatchCharNode
  1003. // ----------------------------------------------------------------------
  1004. CharCount MatchCharNode::LiteralLength() const
  1005. {
  1006. return 1;
  1007. }
  1008. void MatchCharNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  1009. {
  1010. Assert(!isEquivClass);
  1011. Assert(litbufNext + 1 <= litbufLen);
  1012. if (litbufNext + 1 <= litbufLen) // for prefast
  1013. litbuf[litbufNext++] = cs[0];
  1014. }
  1015. bool MatchCharNode::IsCharOrPositiveSet() const
  1016. {
  1017. return true;
  1018. }
  1019. CharCount MatchCharNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1020. {
  1021. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0)
  1022. {
  1023. Char equivs[CaseInsensitive::EquivClassSize];
  1024. bool isNonTrivial = compiler.standardChars->ToEquivs(compiler.program->GetCaseMappingSource(), cs[0], equivs);
  1025. if (isNonTrivial)
  1026. {
  1027. isEquivClass = true;
  1028. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1029. {
  1030. cs[i] = equivs[i];
  1031. }
  1032. }
  1033. }
  1034. return 0;
  1035. }
  1036. void MatchCharNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1037. {
  1038. }
  1039. bool MatchCharNode::IsRefiningAssertion(Compiler& compiler)
  1040. {
  1041. return false;
  1042. }
  1043. void MatchCharNode::AnnotatePass0(Compiler& compiler)
  1044. {
  1045. // If c is a word char then all characters equivalent to c are word chars
  1046. isWord = compiler.standardChars->IsWord(cs[0]);
  1047. }
  1048. void MatchCharNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1049. {
  1050. features = HasMatchChar;
  1051. thisConsumes.Exact(1);
  1052. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1053. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1054. firstSet->Set(compiler.ctAllocator, cs[i]);
  1055. isFirstExact = true;
  1056. isThisIrrefutable = false;
  1057. isThisWillNotProgress = true;
  1058. isThisWillNotRegress = true;
  1059. isNotInLoop = parentNotInLoop;
  1060. isAtLeastOnce = parentAtLeastOnce;
  1061. isNotSpeculative = parentNotSpeculative;
  1062. isNotNegated = parentNotNegated;
  1063. }
  1064. void MatchCharNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1065. {
  1066. prevConsumes = accumConsumes;
  1067. isPrevWillNotProgress = accumPrevWillNotProgress;
  1068. isPrevWillNotRegress = accumPrevWillNotRegress;
  1069. }
  1070. void MatchCharNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1071. {
  1072. followConsumes = accumConsumes;
  1073. followSet = accumFollow;
  1074. isFollowIrrefutable = accumFollowIrrefutable;
  1075. isFollowEOL = accumFollowEOL;
  1076. }
  1077. void MatchCharNode::AnnotatePass4(Compiler& compiler)
  1078. {
  1079. isDeterministic = true;
  1080. }
  1081. bool MatchCharNode::SupportsPrefixSkipping(Compiler& compiler) const
  1082. {
  1083. return true;
  1084. }
  1085. Node* MatchCharNode::HeadSyncronizingNode(Compiler& compiler)
  1086. {
  1087. return this;
  1088. }
  1089. CharCount MatchCharNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1090. {
  1091. numLiterals++;
  1092. return 1;
  1093. }
  1094. void MatchCharNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1095. {
  1096. Assert(false);
  1097. }
  1098. void MatchCharNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1099. {
  1100. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1101. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  1102. {
  1103. bestNode = this;
  1104. }
  1105. }
  1106. void MatchCharNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1107. {
  1108. }
  1109. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  1110. void MatchCharNode::Emit(Compiler& compiler, __in_ecount(4) Char * cs, bool isEquivClass)
  1111. {
  1112. if (isEquivClass)
  1113. {
  1114. Char uniqueEquivs[CaseInsensitive::EquivClassSize];
  1115. CharCount uniqueEquivCount = FindUniqueEquivs(cs, uniqueEquivs);
  1116. switch (uniqueEquivCount)
  1117. {
  1118. case 2:
  1119. EMIT(compiler, MatchChar2Inst, uniqueEquivs[0], uniqueEquivs[1]);
  1120. break;
  1121. case 3:
  1122. EMIT(compiler, MatchChar3Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2]);
  1123. break;
  1124. default:
  1125. EMIT(compiler, MatchChar4Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2], uniqueEquivs[3]);
  1126. break;
  1127. }
  1128. }
  1129. else
  1130. EMIT(compiler, MatchCharInst, cs[0]);
  1131. }
  1132. CharCount MatchCharNode::FindUniqueEquivs(const Char equivs[CaseInsensitive::EquivClassSize], __out_ecount(4) Char uniqueEquivs[CaseInsensitive::EquivClassSize])
  1133. {
  1134. uniqueEquivs[0] = equivs[0];
  1135. CharCount uniqueCount = 1;
  1136. for (CharCount equivIndex = 1; equivIndex < CaseInsensitive::EquivClassSize; ++equivIndex)
  1137. {
  1138. bool alreadyHave = false;
  1139. for (CharCount uniqueIndex = 0; uniqueIndex < uniqueCount; ++uniqueIndex)
  1140. {
  1141. if (uniqueEquivs[uniqueIndex] == equivs[equivIndex])
  1142. {
  1143. alreadyHave = true;
  1144. break;
  1145. }
  1146. }
  1147. if (!alreadyHave)
  1148. {
  1149. uniqueEquivs[uniqueCount] = equivs[equivIndex];
  1150. uniqueCount += 1;
  1151. }
  1152. }
  1153. return uniqueCount;
  1154. }
  1155. void MatchCharNode::Emit(Compiler& compiler, CharCount& skipped)
  1156. {
  1157. if (skipped >= 1)
  1158. {
  1159. // Asking to skip entire char
  1160. skipped--;
  1161. return;
  1162. }
  1163. //
  1164. // Compilation scheme:
  1165. //
  1166. // MatchChar(2|3|4)?
  1167. //
  1168. skipped -= min(skipped, static_cast<CharCount>(1));
  1169. Emit(compiler, cs, isEquivClass);
  1170. }
  1171. CharCount MatchCharNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1172. {
  1173. //
  1174. // Compilation scheme:
  1175. //
  1176. // SyncTo(Char|Char2Set|Set)And(Consume|Continue|Backup)
  1177. //
  1178. if (isHeadSyncronizingNode)
  1179. {
  1180. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1181. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1182. if (firstSet->IsSingleton())
  1183. EMIT(compiler, SyncToCharAndConsumeInst, firstSet->Singleton());
  1184. else
  1185. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1186. return 1;
  1187. }
  1188. else
  1189. {
  1190. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1191. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1192. // back up, we can use SyncToCharAndContinue instead.
  1193. if (prevConsumes.IsExact(0))
  1194. {
  1195. Char entries[CharSet<Char>::MaxCompact];
  1196. int count = firstSet->GetCompactEntries(2, entries);
  1197. if (count == 1)
  1198. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  1199. else if (count == 2)
  1200. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  1201. else
  1202. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1203. }
  1204. else
  1205. {
  1206. if (firstSet->IsSingleton())
  1207. EMIT(compiler, SyncToCharAndBackupInst, firstSet->Singleton(), prevConsumes);
  1208. else
  1209. EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1210. }
  1211. return 0;
  1212. }
  1213. }
  1214. bool MatchCharNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1215. {
  1216. // We look for octoquad patterns before converting for case-insensitivity
  1217. Assert(!isEquivClass);
  1218. return oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), cs[0]));
  1219. }
  1220. bool MatchCharNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1221. {
  1222. if (isEquivClass)
  1223. {
  1224. accNumAlts *= CaseInsensitive::EquivClassSize;
  1225. if (accNumAlts > maxTrieArmExpansion)
  1226. return false;
  1227. }
  1228. return true;
  1229. }
  1230. bool MatchCharNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1231. {
  1232. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1233. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1234. {
  1235. if (trie->IsAccepting())
  1236. {
  1237. // An earlier literal is a prefix of this literal
  1238. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1239. // Otherwise, must fail.
  1240. return isAcceptFirst;
  1241. }
  1242. CharTrie* tail = trie->Add(compiler.ctAllocator, cs[i]);
  1243. if (trie->Count() > maxTrieArmExpansion)
  1244. return false;
  1245. if (cont == 0)
  1246. {
  1247. if (tail->Count() > 0)
  1248. // This literal is a proper prefix of an earlier literal
  1249. return false;
  1250. tail->SetAccepting();
  1251. }
  1252. else
  1253. {
  1254. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1255. return false;
  1256. }
  1257. }
  1258. return true;
  1259. }
  1260. #if ENABLE_REGEX_CONFIG_OPTIONS
  1261. void MatchCharNode::Print(DebugWriter* w, const Char* litbuf) const
  1262. {
  1263. w->Print(_u("MatchChar("));
  1264. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1265. {
  1266. if (i > 0)
  1267. w->Print(_u(", "));
  1268. w->PrintQuotedChar(cs[i]);
  1269. }
  1270. w->PrintEOL(_u(")"));
  1271. PrintAnnotations(w);
  1272. }
  1273. #endif
  1274. // ----------------------------------------------------------------------
  1275. // MatchSetNode
  1276. // ----------------------------------------------------------------------
  1277. CharCount MatchSetNode::LiteralLength() const
  1278. {
  1279. return 0;
  1280. }
  1281. bool MatchSetNode::IsCharOrPositiveSet() const
  1282. {
  1283. return !isNegation;
  1284. }
  1285. CharCount MatchSetNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1286. {
  1287. return 0;
  1288. }
  1289. void MatchSetNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1290. {
  1291. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0 && this->needsEquivClass)
  1292. {
  1293. // If the set is negated, then at this point we could:
  1294. // (1) take each char in the set to its equivalence class and complement it
  1295. // (2) complement the set and take each char to its equivalence class
  1296. // Thankfully the spec demands (1), so we don't need to actually calculate any complement, just
  1297. // retain the isNegated flag
  1298. CharSet<Char> newSet;
  1299. // First include all the existing characters in the result set so that large ranges such as \x80-\uffff
  1300. // don't temporarily turn into a large number of small ranges corresponding to the various equivalent
  1301. // characters
  1302. newSet.UnionInPlace(compiler.rtAllocator, set);
  1303. set.ToEquivClass(compiler.rtAllocator, newSet);
  1304. set = newSet;
  1305. }
  1306. }
  1307. bool MatchSetNode::IsRefiningAssertion(Compiler& compiler)
  1308. {
  1309. return false;
  1310. }
  1311. void MatchSetNode::AnnotatePass0(Compiler& compiler)
  1312. {
  1313. if (isNegation || set.IsEmpty())
  1314. isWord = false;
  1315. else
  1316. isWord = set.IsSubsetOf(*compiler.standardChars->GetWordSet());
  1317. }
  1318. void MatchSetNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1319. {
  1320. features = HasMatchSet;
  1321. if (isNegation)
  1322. {
  1323. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1324. set.ToComplement(compiler.ctAllocator, *firstSet);
  1325. }
  1326. else
  1327. {
  1328. // CAUTION:
  1329. // Be careful not to use firstSet after deleting the node.
  1330. firstSet = &set;
  1331. }
  1332. isFirstExact = true;
  1333. // If firstSet is empty then pattern will always fail
  1334. thisConsumes.Exact(firstSet->IsEmpty() ? 0 : 1);
  1335. isThisIrrefutable = false;
  1336. isThisWillNotProgress = true;
  1337. isThisWillNotRegress = true;
  1338. isNotInLoop = parentNotInLoop;
  1339. isAtLeastOnce = parentAtLeastOnce;
  1340. isNotSpeculative = parentNotSpeculative;
  1341. isNotNegated = parentNotNegated;
  1342. }
  1343. void MatchSetNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1344. {
  1345. prevConsumes = accumConsumes;
  1346. isPrevWillNotProgress = accumPrevWillNotProgress;
  1347. isPrevWillNotRegress = accumPrevWillNotRegress;
  1348. }
  1349. void MatchSetNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1350. {
  1351. followConsumes = accumConsumes;
  1352. followSet = accumFollow;
  1353. isFollowIrrefutable = accumFollowIrrefutable;
  1354. isFollowEOL = accumFollowEOL;
  1355. }
  1356. void MatchSetNode::AnnotatePass4(Compiler& compiler)
  1357. {
  1358. isDeterministic = true;
  1359. }
  1360. bool MatchSetNode::SupportsPrefixSkipping(Compiler& compiler) const
  1361. {
  1362. return true;
  1363. }
  1364. Node* MatchSetNode::HeadSyncronizingNode(Compiler& compiler)
  1365. {
  1366. return this;
  1367. }
  1368. CharCount MatchSetNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1369. {
  1370. return 0;
  1371. }
  1372. void MatchSetNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1373. {
  1374. Assert(false);
  1375. }
  1376. void MatchSetNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1377. {
  1378. }
  1379. void MatchSetNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1380. {
  1381. }
  1382. void MatchSetNode::Emit(Compiler& compiler, CharCount& skipped)
  1383. {
  1384. if (skipped >= 1)
  1385. {
  1386. // Asking to skip entire set
  1387. skipped--;
  1388. return;
  1389. }
  1390. //
  1391. // Compilation scheme:
  1392. //
  1393. // MatchSet
  1394. //
  1395. skipped -= min(skipped, static_cast<CharCount>(1));
  1396. RuntimeCharSet<Char> *runtimeSet;
  1397. if(isNegation)
  1398. runtimeSet = &EMIT(compiler, MatchSetInst<true>)->set;
  1399. else
  1400. runtimeSet = &EMIT(compiler, MatchSetInst<false>)->set;
  1401. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1402. }
  1403. CharCount MatchSetNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1404. {
  1405. //
  1406. // Compilation scheme:
  1407. //
  1408. // SyncToSetAnd(Consume|Continue|Backup)
  1409. //
  1410. RuntimeCharSet<Char> *runtimeSet;
  1411. CharCount consumedChars;
  1412. if (isHeadSyncronizingNode)
  1413. {
  1414. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1415. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1416. if(isNegation)
  1417. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<true>)->set;
  1418. else
  1419. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<false>)->set;
  1420. consumedChars = 1;
  1421. }
  1422. else
  1423. {
  1424. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1425. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1426. // back up, we still cannot use SyncToSetAndContinue like in MatchCharNode::EmitScan, since SyncToSetAndContinue does not support negation
  1427. // sets.
  1428. if(prevConsumes.IsExact(0))
  1429. {
  1430. if(isNegation)
  1431. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<true>)->set;
  1432. else
  1433. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<false>)->set;
  1434. }
  1435. else if(isNegation)
  1436. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<true>, prevConsumes)->set;
  1437. else
  1438. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set;
  1439. consumedChars = 0;
  1440. }
  1441. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1442. return consumedChars;
  1443. }
  1444. bool MatchSetNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1445. {
  1446. if (isNegation || set.IsEmpty() || !oi->BeginUnions())
  1447. return false;
  1448. Assert(CharSet<Char>::MaxCompact >= TrigramAlphabet::AlphaCount);
  1449. Char entries[CharSet<Char>::MaxCompact];
  1450. int count = set.GetCompactEntries(TrigramAlphabet::AlphaCount, entries);
  1451. if (count < 0)
  1452. // Too many unique characters
  1453. return false;
  1454. for (int i = 0; i < count; i++)
  1455. {
  1456. if (!oi->UnionChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), entries[i])))
  1457. // Too many unique characters
  1458. return false;
  1459. }
  1460. oi->EndUnions(); // this doesn't need to be called if we return false earlier since the OctoquadPattern won't be used
  1461. return true;
  1462. }
  1463. bool MatchSetNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1464. {
  1465. if (isNegation || !set.IsCompact())
  1466. return false;
  1467. const uint count = set.Count();
  1468. if(count == 0)
  1469. return false; // empty set always fails and consumes nothing, and therefore is not a char-trie arm
  1470. accNumAlts *= count;
  1471. if (accNumAlts > maxTrieArmExpansion)
  1472. return false;
  1473. return true;
  1474. }
  1475. bool MatchSetNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1476. {
  1477. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1478. Assert(!isNegation && set.IsCompact());
  1479. Char entries[CharSet<Char>::MaxCompact];
  1480. int count = set.GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  1481. Assert(count > 0);
  1482. for (int i = 0; i < count; i++)
  1483. {
  1484. if (trie->IsAccepting())
  1485. {
  1486. // An earlier literal is a prefix of this literal
  1487. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1488. // Otherwise, must fail.
  1489. return isAcceptFirst;
  1490. }
  1491. CharTrie* tail = trie->Add(compiler.ctAllocator, entries[i]);
  1492. if (trie->Count() > maxTrieArmExpansion)
  1493. return false;
  1494. if (cont == 0)
  1495. {
  1496. if (tail->Count() > 0)
  1497. // This literal is a proper prefix of an earlier literal
  1498. return false;
  1499. tail->SetAccepting();
  1500. }
  1501. else
  1502. {
  1503. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1504. return false;
  1505. }
  1506. }
  1507. return true;
  1508. }
  1509. #if ENABLE_REGEX_CONFIG_OPTIONS
  1510. void MatchSetNode::Print(DebugWriter* w, const Char* litbuf) const
  1511. {
  1512. w->Print(_u("MatchSet(%s, "), isNegation ? _u("negative") : _u("positive"));
  1513. set.Print(w);
  1514. w->PrintEOL(_u(")"));
  1515. PrintAnnotations(w);
  1516. }
  1517. #endif
  1518. // ----------------------------------------------------------------------
  1519. // ConcatNode
  1520. // ----------------------------------------------------------------------
  1521. CharCount ConcatNode::LiteralLength() const
  1522. {
  1523. return 0;
  1524. }
  1525. bool ConcatNode::IsCharOrPositiveSet() const
  1526. {
  1527. return false;
  1528. }
  1529. CharCount ConcatNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1530. {
  1531. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1532. Assert(tail != 0);
  1533. CharCount n = 0;
  1534. #if DBG
  1535. ConcatNode* prev = 0;
  1536. #endif
  1537. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1538. {
  1539. Assert(curr->head->tag != Concat);
  1540. Assert(prev == 0 || !(prev->head->LiteralLength() > 0 && curr->head->LiteralLength() > 0));
  1541. n += curr->head->TransferPass0(compiler, litbuf);
  1542. #if DBG
  1543. prev = curr;
  1544. #endif
  1545. }
  1546. return n;
  1547. }
  1548. void ConcatNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1549. {
  1550. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1551. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1552. curr->head->TransferPass1(compiler, litbuf);
  1553. }
  1554. bool ConcatNode::IsRefiningAssertion(Compiler& compiler)
  1555. {
  1556. return false;
  1557. }
  1558. void ConcatNode::AnnotatePass0(Compiler& compiler)
  1559. {
  1560. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1561. Node* prev = 0;
  1562. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1563. {
  1564. curr->head->AnnotatePass0(compiler);
  1565. if (prev != 0)
  1566. {
  1567. if (curr->head->tag == WordBoundary && prev->isWord)
  1568. {
  1569. WordBoundaryNode* wb = (WordBoundaryNode*)curr->head;
  1570. wb->mustIncludeLeaving = true;
  1571. }
  1572. else if (prev->tag == WordBoundary && curr->head->isWord)
  1573. {
  1574. WordBoundaryNode* wb = (WordBoundaryNode*)prev;
  1575. wb->mustIncludeEntering = true;
  1576. }
  1577. }
  1578. prev = curr->head;
  1579. }
  1580. }
  1581. void ConcatNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1582. {
  1583. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1584. features = HasConcat;
  1585. isNotInLoop = parentNotInLoop;
  1586. isAtLeastOnce = parentAtLeastOnce;
  1587. isNotSpeculative = parentNotSpeculative;
  1588. isNotNegated = parentNotNegated;
  1589. // Pass 1: Count items
  1590. int n = 0;
  1591. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1592. n++;
  1593. // Pass 2: Annotate each item, accumulate features, consumes, find longest prefix of possibly-empty-accepting items,
  1594. // check if all items are irrefutable
  1595. int emptyPrefix = 0;
  1596. thisConsumes.Exact(0);
  1597. isThisIrrefutable = true;
  1598. isThisWillNotProgress = true;
  1599. isThisWillNotRegress = true;
  1600. int item = 0;
  1601. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1602. {
  1603. curr->head->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, isNotNegated);
  1604. features |= curr->head->features;
  1605. thisConsumes.Add(curr->head->thisConsumes);
  1606. if (!curr->head->isThisIrrefutable)
  1607. isThisIrrefutable = false;
  1608. if (!curr->head->isThisWillNotProgress)
  1609. isThisWillNotProgress = false;
  1610. if (!curr->head->isThisWillNotRegress)
  1611. isThisWillNotRegress = false;
  1612. if (emptyPrefix == item && curr->head->thisConsumes.CouldMatchEmpty())
  1613. emptyPrefix++;
  1614. }
  1615. if (emptyPrefix == 0)
  1616. {
  1617. firstSet = head->firstSet;
  1618. isFirstExact = head->isFirstExact;
  1619. }
  1620. else
  1621. {
  1622. // Pass 3: Overall first set is union of first's of emptyPrefx
  1623. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1624. isFirstExact = true;
  1625. item = 0;
  1626. for (ConcatNode* curr = this; item <= min(emptyPrefix, n - 1); curr = curr->tail, item++)
  1627. {
  1628. AnalysisAssert(curr);
  1629. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1630. if (!curr->head->isFirstExact)
  1631. isFirstExact = false;
  1632. }
  1633. }
  1634. }
  1635. void ConcatNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1636. {
  1637. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1638. prevConsumes = accumConsumes;
  1639. isPrevWillNotProgress = accumPrevWillNotProgress;
  1640. isPrevWillNotRegress = accumPrevWillNotRegress;
  1641. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1642. {
  1643. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1644. accumConsumes.Add(curr->head->thisConsumes);
  1645. if (!curr->head->isThisWillNotProgress)
  1646. accumPrevWillNotProgress = false;
  1647. if (!curr->head->isThisWillNotRegress)
  1648. accumPrevWillNotRegress = false;
  1649. }
  1650. }
  1651. void ConcatNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1652. {
  1653. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1654. followConsumes = accumConsumes;
  1655. followSet = accumFollow;
  1656. isFollowIrrefutable = accumFollowIrrefutable;
  1657. isFollowEOL = accumFollowEOL;
  1658. // Pass 1: Count items
  1659. int n = 0;
  1660. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1661. n++;
  1662. // Pass 2: Collect items so we can enumerate backwards
  1663. Node** nodes = AnewArray(compiler.ctAllocator, Node *, n);
  1664. int item = 0;
  1665. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1666. nodes[item] = curr->head;
  1667. // Pass 3: Work backwards propagating follow set, irrefutability and FollowEndLineOrPattern, and adding consumes
  1668. CharSet<Char>* innerFollow = accumFollow;
  1669. for (item = n - 1; item >= 0; item--)
  1670. {
  1671. nodes[item]->AnnotatePass3(compiler, accumConsumes, innerFollow, accumFollowIrrefutable, accumFollowEOL);
  1672. if (!nodes[item]->isThisIrrefutable)
  1673. accumFollowIrrefutable = false;
  1674. if (!nodes[item]->IsEmptyOnly() && (compiler.program->flags & MultilineRegexFlag) == 0)
  1675. accumFollowEOL = nodes[item]->tag == EOL;
  1676. // ConcatNode has hardfail BOI test if any child has hardfail BOI
  1677. hasInitialHardFailBOI = hasInitialHardFailBOI || nodes[item]->hasInitialHardFailBOI;
  1678. if (item > 0)
  1679. {
  1680. CharSet<Char>* nextInnerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  1681. if (nodes[item]->thisConsumes.CouldMatchEmpty() && !nodes[item]->IsRefiningAssertion(compiler))
  1682. {
  1683. // Later follows can shine through this item to the previous item
  1684. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *innerFollow);
  1685. }
  1686. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *nodes[item]->firstSet);
  1687. innerFollow = nextInnerFollow;
  1688. accumConsumes.Add(nodes[item]->thisConsumes);
  1689. }
  1690. }
  1691. }
  1692. void ConcatNode::AnnotatePass4(Compiler& compiler)
  1693. {
  1694. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1695. isDeterministic = true;
  1696. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1697. {
  1698. curr->head->AnnotatePass4(compiler);
  1699. if (!curr->head->isDeterministic)
  1700. isDeterministic = false;
  1701. }
  1702. }
  1703. bool ConcatNode::SupportsPrefixSkipping(Compiler& compiler) const
  1704. {
  1705. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1706. int prefix = 0;
  1707. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1708. {
  1709. if (curr->head->SupportsPrefixSkipping(compiler))
  1710. prefix++;
  1711. else
  1712. break;
  1713. }
  1714. return prefix > 0;
  1715. }
  1716. Node* ConcatNode::HeadSyncronizingNode(Compiler& compiler)
  1717. {
  1718. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1719. return head->HeadSyncronizingNode(compiler);
  1720. }
  1721. void ConcatNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1722. {
  1723. PROBE_STACK(scriptContext, Js::Constants::MinStackRegex);
  1724. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1725. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  1726. }
  1727. CharCount ConcatNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1728. {
  1729. return 0;
  1730. }
  1731. void ConcatNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1732. {
  1733. Assert(false);
  1734. }
  1735. void ConcatNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1736. {
  1737. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1738. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1739. curr->head->BestSyncronizingNode(compiler, bestNode);
  1740. }
  1741. void ConcatNode::Emit(Compiler& compiler, CharCount& skipped)
  1742. {
  1743. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1744. //
  1745. // Compilation scheme:
  1746. //
  1747. // <item 1>
  1748. // ...
  1749. // <item n>
  1750. //
  1751. // :-)
  1752. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1753. curr->head->Emit(compiler, skipped);
  1754. }
  1755. CharCount ConcatNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1756. {
  1757. Assert(false);
  1758. return 0;
  1759. }
  1760. bool ConcatNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1761. {
  1762. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1763. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1764. {
  1765. if (!curr->head->IsOctoquad(compiler, oi))
  1766. return false;
  1767. }
  1768. return true;
  1769. }
  1770. bool ConcatNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1771. {
  1772. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1773. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1774. {
  1775. if (!curr->head->IsCharTrieArm(compiler, accNumAlts))
  1776. return false;
  1777. }
  1778. return true;
  1779. }
  1780. bool ConcatNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1781. {
  1782. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1783. if (cont != 0)
  1784. // We don't want to manage a stack of continuations
  1785. return false;
  1786. // NOTE: This is the only place we use an internal node of a concat sequence as a sub-concat sequence
  1787. return head->BuildCharTrie(compiler, trie, tail, isAcceptFirst);
  1788. }
  1789. #if ENABLE_REGEX_CONFIG_OPTIONS
  1790. void ConcatNode::Print(DebugWriter* w, const Char* litbuf) const
  1791. {
  1792. w->PrintEOL(_u("Concat()"));
  1793. PrintAnnotations(w);
  1794. w->PrintEOL(_u("{"));
  1795. w->Indent();
  1796. for (const ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1797. curr->head->Print(w, litbuf);
  1798. w->Unindent();
  1799. w->PrintEOL(_u("}"));
  1800. }
  1801. #endif
  1802. // ----------------------------------------------------------------------
  1803. // AltNode
  1804. // ----------------------------------------------------------------------
  1805. CharCount AltNode::LiteralLength() const
  1806. {
  1807. return 0;
  1808. }
  1809. bool AltNode::IsCharOrPositiveSet() const
  1810. {
  1811. return false;
  1812. }
  1813. CharCount AltNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1814. {
  1815. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1816. Assert(tail != 0);
  1817. CharCount n = 0;
  1818. #if DBG
  1819. AltNode* prev = 0;
  1820. #endif
  1821. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1822. {
  1823. Assert(curr->head->tag != Alt);
  1824. Assert(prev == 0 || !(prev->head->IsCharOrPositiveSet() && curr->head->IsCharOrPositiveSet()));
  1825. n += curr->head->TransferPass0(compiler, litbuf);
  1826. #if DBG
  1827. prev = curr;
  1828. #endif
  1829. }
  1830. return n;
  1831. }
  1832. void AltNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1833. {
  1834. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1835. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  1836. curr->head->TransferPass1(compiler, litbuf);
  1837. }
  1838. bool AltNode::IsRefiningAssertion(Compiler& compiler)
  1839. {
  1840. return false;
  1841. }
  1842. void AltNode::AnnotatePass0(Compiler& compiler)
  1843. {
  1844. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1845. isWord = true;
  1846. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1847. {
  1848. curr->head->AnnotatePass0(compiler);
  1849. if (!curr->head->isWord)
  1850. isWord = false;
  1851. }
  1852. }
  1853. void AltNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1854. {
  1855. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1856. features = HasAlt;
  1857. isNotInLoop = parentNotInLoop;
  1858. isAtLeastOnce = parentAtLeastOnce;
  1859. isNotSpeculative = parentNotSpeculative;
  1860. isNotNegated = parentNotNegated;
  1861. // Overall alternative:
  1862. // - is irrefutable if at least one item is irrefutable
  1863. // - will not progress(regress) if each item will not progress(regress) and has strictly decreasing(increasing) consumes
  1864. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1865. isThisIrrefutable = false;
  1866. isThisWillNotProgress = true;
  1867. isThisWillNotRegress = true;
  1868. isFirstExact = true;
  1869. CountDomain prevConsumes;
  1870. int item = 0;
  1871. for (AltNode *curr = this; curr != 0; curr = curr->tail, item++)
  1872. {
  1873. curr->head->AnnotatePass1(compiler, parentNotInLoop, false, parentNotSpeculative, isNotNegated);
  1874. features |= curr->head->features;
  1875. if (!curr->head->isThisWillNotProgress)
  1876. isThisWillNotProgress = false;
  1877. if (!curr->head->isThisWillNotRegress)
  1878. isThisWillNotRegress = false;
  1879. if (item == 0)
  1880. prevConsumes = thisConsumes = curr->head->thisConsumes;
  1881. else
  1882. {
  1883. thisConsumes.Lub(curr->head->thisConsumes);
  1884. if (!curr->head->thisConsumes.IsLessThan(prevConsumes))
  1885. isThisWillNotProgress = false;
  1886. if (!curr->head->thisConsumes.IsGreaterThan(prevConsumes))
  1887. isThisWillNotRegress = false;
  1888. prevConsumes = curr->head->thisConsumes;
  1889. }
  1890. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1891. if (!curr->head->isFirstExact || curr->head->isThisIrrefutable)
  1892. // If any item is irrefutable then later items may never be taken, so first set cannot be exact
  1893. isFirstExact = false;
  1894. if (curr->head->isThisIrrefutable)
  1895. isThisIrrefutable = true;
  1896. }
  1897. }
  1898. void AltNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1899. {
  1900. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1901. prevConsumes = accumConsumes;
  1902. isPrevWillNotProgress = accumPrevWillNotProgress;
  1903. isPrevWillNotRegress = accumPrevWillNotRegress;
  1904. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1905. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1906. }
  1907. void AltNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1908. {
  1909. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1910. followConsumes = accumConsumes;
  1911. followSet = accumFollow;
  1912. isFollowIrrefutable = accumFollowIrrefutable;
  1913. isFollowEOL = accumFollowEOL;
  1914. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1915. {
  1916. curr->head->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  1917. // AltNode has hardfail BOI test if all child Nodes have hardfail BOI tests
  1918. hasInitialHardFailBOI = curr->head->hasInitialHardFailBOI && (hasInitialHardFailBOI || (curr == this));
  1919. }
  1920. }
  1921. void AltNode::AnnotatePass4(Compiler& compiler)
  1922. {
  1923. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  1924. //
  1925. // Simplification rule
  1926. //
  1927. // If the follow is irrefutable then we can ignore all items after an irrefutable item, since
  1928. // we'll never be able to backtrack into them.
  1929. // E.g.: (a*|b*)c* === a*c*
  1930. //
  1931. bool simplified = false;
  1932. if (isFollowIrrefutable && isThisIrrefutable)
  1933. {
  1934. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1935. {
  1936. if (curr->head->isThisIrrefutable && curr->tail != 0)
  1937. {
  1938. curr->tail = 0;
  1939. simplified = true;
  1940. break;
  1941. }
  1942. }
  1943. }
  1944. if (simplified)
  1945. {
  1946. Assert(!isFirstExact);
  1947. // Recalculate firstSet. Since it can only get smaller, and alternative could not have had an exact
  1948. // first set, this recalculation does not make any decisions already made based on the current firstSet
  1949. // unsound.
  1950. // NOTE: Is it worth recalculating the WillNotProgress/WillNotRegress bools?
  1951. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1952. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1953. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1954. }
  1955. //
  1956. // Annotate items
  1957. //
  1958. isDeterministic = true;
  1959. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1960. {
  1961. curr->head->AnnotatePass4(compiler);
  1962. if (!curr->head->isDeterministic)
  1963. isDeterministic = false;
  1964. }
  1965. //
  1966. // Compilation scheme: Switch/Chain/Set, not isOptional
  1967. //
  1968. // If no item can match empty and all items' FIRST sets are pairwise disjoint then we can
  1969. // commit to an item using a 1 char lookahead. We can fall-through to the last
  1970. // item without guarding it since it will fail if the next character cannot match.
  1971. // E.g.: (abc|def)
  1972. //
  1973. {
  1974. // Pass 1: Items cannot match empty, accumulate counts
  1975. bool fires = true;
  1976. bool allCompact = true;
  1977. bool allSimpleOneChar = true;
  1978. int numItems = 0;
  1979. uint totalChars = 0;
  1980. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1981. {
  1982. if (curr->head->thisConsumes.CouldMatchEmpty())
  1983. {
  1984. fires = false;
  1985. break;
  1986. }
  1987. numItems++;
  1988. if (!curr->head->firstSet->IsCompact())
  1989. allCompact = false;
  1990. if (!curr->head->IsSimpleOneChar())
  1991. allSimpleOneChar = false;
  1992. totalChars += curr->head->firstSet->Count();
  1993. }
  1994. if (fires)
  1995. {
  1996. // To go from two to one items requires the first item
  1997. // to be irrefutable, in which case it could match empty and this rule won't fire.
  1998. Assert(numItems > 1);
  1999. // Step 2: Check FIRST sets are disjoint
  2000. if (totalChars == firstSet->Count())
  2001. {
  2002. // **COMMIT**
  2003. if (allSimpleOneChar)
  2004. // This will probably never fire since the parser has already converted alts-of-chars/sets
  2005. // to sets. We include it for symmetry with below.
  2006. scheme = Set;
  2007. else if (allCompact && totalChars <= Switch20Inst::MaxCases)
  2008. {
  2009. // Can use a switch instruction to jump to item
  2010. scheme = Switch;
  2011. switchSize = totalChars;
  2012. }
  2013. else
  2014. // Must use a chain of jump instructions to jump to item
  2015. scheme = Chain;
  2016. isOptional = false;
  2017. return;
  2018. }
  2019. }
  2020. }
  2021. //
  2022. // Compilation scheme: None/Switch/Chain/Set, isOptional
  2023. //
  2024. // Condition (1):
  2025. // If some items are empty-only, the rest (if any) cannot match empty, follow cannot match empty, and
  2026. // all items' FIRST sets are pairwise disjoint and disjoint from the FOLLOW set, then we can commit to
  2027. // either a non-empty item or to the empty item using a 1 char lookahead. In this case we just emit each
  2028. // non-empty item with a guard, and fall-through to follow if no guard fires.
  2029. // E.g.: (abc||def)h
  2030. //
  2031. // Condition (2):
  2032. // If some items are empty-only, the rest (if any) cannot match empty, follow is irrefutable, and all
  2033. // items' FIRST sets are pairwise disjoint, then we can commit to either a non-empty item or to the empty
  2034. // item using a 1 char lookahead, provided each non-empty item obeys the condition:
  2035. // ** the item can't fail if given an arbitrary input starting with a character in its FIRST set **
  2036. // Currently, we can prove that only for IsSimpleOneChar items, though more analysis could widen the class.
  2037. // Again, we emit each non-empty item with a guard, and fall-through to follow if no guard fires.
  2038. // E.g.: ([abc]|)a*
  2039. //
  2040. // Condition (3):
  2041. // If all items are empty-only, we can commit to a single empty-only item
  2042. {
  2043. // Pass 1
  2044. bool fires = false;
  2045. bool allSimpleOneChar = true;
  2046. bool allCompact = true;
  2047. int numNonEmpty = 0;
  2048. uint totalChars = 0;
  2049. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2050. {
  2051. if (curr->head->IsEmptyOnly())
  2052. fires = true;
  2053. else if (curr->head->thisConsumes.CouldMatchEmpty())
  2054. {
  2055. fires = false;
  2056. break;
  2057. }
  2058. else
  2059. {
  2060. numNonEmpty++;
  2061. if (!curr->head->IsSimpleOneChar())
  2062. allSimpleOneChar = false;
  2063. if (!curr->head->firstSet->IsCompact())
  2064. allCompact = false;
  2065. totalChars += curr->head->firstSet->Count();
  2066. }
  2067. }
  2068. if (fires)
  2069. {
  2070. // The firing condition is not strong enough yet.
  2071. fires = false;
  2072. // Check conditions (2) and (3) first because they're faster, then check condition (1).
  2073. if (numNonEmpty == 0 || isFollowIrrefutable && allSimpleOneChar && totalChars == firstSet->Count())
  2074. {
  2075. fires = true;
  2076. }
  2077. else if (!followConsumes.CouldMatchEmpty())
  2078. {
  2079. // Check whether all FIRST sets are pairwise disjoint
  2080. // and disjoint from the FOLLOW set.
  2081. CharSet<Char> unionSet;
  2082. unionSet.UnionInPlace(compiler.ctAllocator, *firstSet);
  2083. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  2084. if (totalChars + followSet->Count() == unionSet.Count())
  2085. fires = true;
  2086. }
  2087. if (fires)
  2088. {
  2089. // **COMMIT**
  2090. if (numNonEmpty == 0)
  2091. scheme = None;
  2092. else if (allSimpleOneChar)
  2093. scheme = Set;
  2094. else if (numNonEmpty > 1 && allCompact && totalChars <= Switch20Inst::MaxCases)
  2095. {
  2096. switchSize = totalChars;
  2097. scheme = Switch;
  2098. }
  2099. else
  2100. scheme = Chain;
  2101. isOptional = true;
  2102. return;
  2103. }
  2104. }
  2105. }
  2106. //
  2107. // Compilation scheme: Trie
  2108. //
  2109. // If alt is equivalent to the form:
  2110. // (literal1|...|literaln)
  2111. // (we expand items with embedded character classes such as a[bc]d to (abd|acd)) and either:
  2112. // - follow is irrefutable and no later literal is a proper prefix of an earlier literal
  2113. // (and we may ignore later literals which have an earlier literal as proper prefix)
  2114. // E.g.: (ab|ac|abd)a* === (ab|ac)a*
  2115. // or:
  2116. // - follow is not irrefutable and no literal is a proper prefix of any other literal
  2117. // and the branching factor of the resulting trie is smallish
  2118. // E.g.: (abc|abd|abe)f
  2119. // then we can use a character trie to match the appropriate item.
  2120. //
  2121. {
  2122. // Pass 1: Items must be structurally appropriate and not result in too many alternatives after expansion
  2123. bool fires = true;
  2124. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2125. {
  2126. uint numAlts = 1;
  2127. if (!curr->head->IsCharTrieArm(compiler, numAlts))
  2128. {
  2129. fires = false;
  2130. break;
  2131. }
  2132. if (numAlts > maxTrieArmExpansion)
  2133. {
  2134. fires = false;
  2135. break;
  2136. }
  2137. }
  2138. if (fires)
  2139. {
  2140. // Pass 2: Attempt to construct the trie, checking for prefixes.
  2141. CharTrie trie;
  2142. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2143. {
  2144. if (!curr->head->BuildCharTrie(compiler, &trie, 0, isFollowIrrefutable))
  2145. {
  2146. fires = false;
  2147. break;
  2148. }
  2149. }
  2150. if (fires)
  2151. {
  2152. // **COMMIT**
  2153. // If follow is irrefutable and first item is empty, the trie would be of depth zero.
  2154. // However, in this case, the first simplification rule would have replaced the alt with a
  2155. // single empty item, and the 'None' compilation scheme would have been selected above.
  2156. //
  2157. // Similarly, if all alternations are empty and follow is refutable, the trie would be
  2158. // of depth zero, and the 'None' compilation scheme would have been selected above.
  2159. Assert(!trie.IsDepthZero());
  2160. if (trie.IsDepthOne())
  2161. {
  2162. // This case will fire if follow is irrefutable and all non length one items have an
  2163. // earlier one-character item as prefix. In this case we don't need the trie: the
  2164. // firstSet has all the information.
  2165. isOptional = false;
  2166. scheme = Set;
  2167. }
  2168. else
  2169. {
  2170. // Root of trie will live in compile-time allocator, but body will be in run-time allocator
  2171. runtimeTrie = Anew(compiler.ctAllocator, RuntimeCharTrie);
  2172. runtimeTrie->CloneFrom(compiler.rtAllocator, trie);
  2173. scheme = Trie;
  2174. }
  2175. return;
  2176. }
  2177. }
  2178. }
  2179. //
  2180. // Compilation scheme: Try
  2181. //
  2182. scheme = Try;
  2183. isDeterministic = false; // NON-DETERMINISTIC
  2184. }
  2185. bool AltNode::SupportsPrefixSkipping(Compiler& compiler) const
  2186. {
  2187. return false;
  2188. }
  2189. Node* AltNode::HeadSyncronizingNode(Compiler& compiler)
  2190. {
  2191. return 0;
  2192. }
  2193. CharCount AltNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2194. {
  2195. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2196. // Here, we ignore nodes with length 1, which are Char nodes. The way the Alt node synchronization
  2197. // is currently implemented, it expects all nodes to be Literal nodes. It requires quite a bit of
  2198. // refactoring to have Alt nodes support Char nodes for synchronization, so Char nodes are ignored
  2199. // for now.
  2200. int localNumLiterals = numLiterals;
  2201. CharCount minLen = head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2202. if (minLen <= 1)
  2203. return 0;
  2204. for (AltNode* curr = tail; curr != 0; curr = curr->tail)
  2205. {
  2206. CharCount thisLen = curr->head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2207. if (thisLen <= 1)
  2208. return 0;
  2209. minLen = min(minLen, thisLen);
  2210. }
  2211. numLiterals = localNumLiterals;
  2212. if (minLen <= 1)
  2213. {
  2214. return 0;
  2215. }
  2216. return minLen;
  2217. }
  2218. void AltNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2219. {
  2220. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2221. for (const AltNode* curr = this; curr != 0; curr = curr->tail)
  2222. curr->head->CollectSyncronizingLiterals(compiler, scanners);
  2223. }
  2224. void AltNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2225. {
  2226. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2227. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  2228. bestNode = this;
  2229. }
  2230. void AltNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2231. {
  2232. PROBE_STACK(scriptContext, Js::Constants::MinStackRegex);
  2233. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  2234. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2235. }
  2236. void AltNode::Emit(Compiler& compiler, CharCount& skipped)
  2237. {
  2238. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2239. Assert(skipped == 0);
  2240. switch (scheme)
  2241. {
  2242. case Try:
  2243. {
  2244. //
  2245. // Compilation scheme:
  2246. //
  2247. // Try((If|Match)(Char|Set))? L2
  2248. // <item 1>
  2249. // Jump Lexit
  2250. // L2: Try((If|Match)(Char|Set))? L3
  2251. // <item 2>
  2252. // Jump Lexit
  2253. // L3: <item 3>
  2254. // Lexit:
  2255. //
  2256. Assert(!isOptional);
  2257. int numItems = 0;
  2258. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2259. numItems++;
  2260. Assert(numItems >= 1);
  2261. // Each item other than last needs to jump to exit on success
  2262. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2263. Label lastTryFixup = 0;
  2264. int item = 0;
  2265. for (AltNode* curr = this; curr != 0; curr = curr->tail, item++)
  2266. {
  2267. if (item > 0)
  2268. // Fixup previous Try
  2269. compiler.DoFixup(lastTryFixup, compiler.CurrentLabel());
  2270. CharCount itemSkipped = 0;
  2271. if (item < numItems-1)
  2272. {
  2273. // HEURISTIC: if the first set of the alternative is exact or small, and the
  2274. // alternative does not match empty, then it's probably worth using
  2275. // a Try(If|Match)(Char|Set)
  2276. if (curr->head->firstSet != 0 &&
  2277. !curr->head->thisConsumes.CouldMatchEmpty() &&
  2278. (curr->head->isFirstExact || curr->head->firstSet->Count() <= maxCharsForConditionalTry))
  2279. {
  2280. if (curr->head->SupportsPrefixSkipping(compiler))
  2281. {
  2282. if (curr->head->firstSet->IsSingleton())
  2283. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, curr->head->firstSet->Singleton())->failLabel);
  2284. else
  2285. {
  2286. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  2287. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2288. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2289. }
  2290. itemSkipped = 1;
  2291. }
  2292. else
  2293. {
  2294. if (curr->head->firstSet->IsSingleton())
  2295. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, curr->head->firstSet->Singleton())->failLabel);
  2296. else
  2297. {
  2298. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  2299. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2300. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2301. }
  2302. }
  2303. }
  2304. else
  2305. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  2306. }
  2307. curr->head->Emit(compiler, itemSkipped);
  2308. if (item < numItems-1)
  2309. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2310. }
  2311. // Fixup jumps
  2312. for (item = 0; item < numItems-1; item++)
  2313. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2314. break;
  2315. }
  2316. case None:
  2317. {
  2318. Assert(isOptional);
  2319. // Nothing to emit
  2320. break;
  2321. }
  2322. case Trie:
  2323. {
  2324. //
  2325. // Compilation scheme:
  2326. //
  2327. // MatchTrie <trie>
  2328. //
  2329. EMIT(compiler, MatchTrieInst)->trie = *runtimeTrie;
  2330. break;
  2331. }
  2332. case Switch:
  2333. {
  2334. //
  2335. // Compilation scheme:
  2336. //
  2337. // Switch(AndConsume)?(10|20)(<dispatch to each arm>)
  2338. // Fail (if non-optional)
  2339. // Jump Lexit (if optional)
  2340. // L1: <item1>
  2341. // Jump Lexit
  2342. // L2: <item2>
  2343. // Jump Lexit
  2344. // L3: <item3>
  2345. // Lexit:
  2346. //
  2347. Assert(switchSize <= Switch20Inst::MaxCases);
  2348. int numItems = 0;
  2349. bool allCanSkip = true;
  2350. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2351. {
  2352. if (curr->head->thisConsumes.CouldMatchEmpty())
  2353. {
  2354. Assert(isOptional);
  2355. }
  2356. else
  2357. {
  2358. numItems++;
  2359. if (!curr->head->SupportsPrefixSkipping(compiler))
  2360. allCanSkip = false;
  2361. }
  2362. }
  2363. Assert(numItems > 1);
  2364. // Each item other than last needs to jump to exit on success
  2365. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2366. // We must remember where each item begins to fixup switch
  2367. Label* caseLabels = AnewArray(compiler.ctAllocator, Label, numItems);
  2368. // We must fixup the switch arms
  2369. Label switchLabel = compiler.CurrentLabel();
  2370. Assert(switchSize <= Switch20Inst::MaxCases);
  2371. if (allCanSkip)
  2372. {
  2373. if (switchSize > Switch10Inst::MaxCases)
  2374. EMIT(compiler, SwitchAndConsume20Inst);
  2375. else
  2376. EMIT(compiler, SwitchAndConsume10Inst);
  2377. }
  2378. else
  2379. {
  2380. if (switchSize > Switch10Inst::MaxCases)
  2381. EMIT(compiler, Switch20Inst);
  2382. else
  2383. EMIT(compiler, Switch10Inst);
  2384. }
  2385. Label defaultJumpFixup = 0;
  2386. if (isOptional)
  2387. // Must fixup default jump to exit
  2388. defaultJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2389. else
  2390. compiler.Emit<FailInst>();
  2391. // Emit each item
  2392. int item = 0;
  2393. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2394. {
  2395. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2396. {
  2397. if (allCanSkip)
  2398. skipped = 1;
  2399. caseLabels[item] = compiler.CurrentLabel();
  2400. curr->head->Emit(compiler, skipped);
  2401. if (item < numItems - 1)
  2402. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2403. item++;
  2404. }
  2405. }
  2406. // Fixup exit labels
  2407. if (isOptional)
  2408. compiler.DoFixup(defaultJumpFixup, compiler.CurrentLabel());
  2409. for (item = 0; item < numItems - 1; item++)
  2410. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2411. // Fixup the switch entries
  2412. item = 0;
  2413. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2414. {
  2415. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2416. {
  2417. Char entries[CharSet<Char>::MaxCompact];
  2418. int count = curr->head->firstSet->GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  2419. Assert(count > 0);
  2420. for (int i = 0; i < count; i++)
  2421. {
  2422. if (allCanSkip)
  2423. {
  2424. if (switchSize > Switch10Inst::MaxCases)
  2425. compiler.L2I(SwitchAndConsume20, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2426. else
  2427. compiler.L2I(SwitchAndConsume10, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2428. }
  2429. else
  2430. {
  2431. if (switchSize > Switch10Inst::MaxCases)
  2432. compiler.L2I(Switch20, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2433. else
  2434. compiler.L2I(Switch10, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2435. }
  2436. }
  2437. item++;
  2438. }
  2439. }
  2440. break;
  2441. }
  2442. case Chain:
  2443. {
  2444. //
  2445. // Compilation scheme:
  2446. //
  2447. // JumpIfNot(Char|Set) L2
  2448. // <item1>
  2449. // Jump Lexit
  2450. // L2: JumpIfNot(Char|Set) L3
  2451. // <item2>
  2452. // Jump Lexit
  2453. // L3: <item3> (if non-optional)
  2454. // L3: JumpIfNot(Char|Set) Lexit (if optional)
  2455. // <item3> (if optional)
  2456. // Lexit:
  2457. //
  2458. int numItems = 0;
  2459. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2460. {
  2461. if (curr->head->thisConsumes.CouldMatchEmpty())
  2462. {
  2463. Assert(isOptional);
  2464. }
  2465. else
  2466. numItems++;
  2467. }
  2468. Assert(numItems > 0);
  2469. // Each item other than last needs to jump to exit on success
  2470. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2471. Label lastJumpFixup = 0;
  2472. int item = 0;
  2473. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2474. {
  2475. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2476. {
  2477. if (item > 0)
  2478. // Fixup previous Jump
  2479. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2480. CharCount itemSkipped = 0;
  2481. if (item < numItems-1 || isOptional)
  2482. {
  2483. if (curr->head->firstSet->IsSingleton())
  2484. {
  2485. if (curr->head->SupportsPrefixSkipping(compiler))
  2486. {
  2487. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, curr->head->firstSet->Singleton())->targetLabel);
  2488. itemSkipped = 1;
  2489. }
  2490. else
  2491. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, curr->head->firstSet->Singleton())->targetLabel);
  2492. }
  2493. else
  2494. {
  2495. if (curr->head->SupportsPrefixSkipping(compiler))
  2496. {
  2497. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  2498. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2499. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2500. itemSkipped = 1;
  2501. }
  2502. else
  2503. {
  2504. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  2505. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2506. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2507. }
  2508. }
  2509. }
  2510. curr->head->Emit(compiler, itemSkipped);
  2511. if (item < numItems-1)
  2512. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2513. item++;
  2514. }
  2515. }
  2516. // Fixup jumps to exit
  2517. for (item = 0; item < numItems-1; item++)
  2518. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2519. if (isOptional)
  2520. // Fixup last Jump to exit
  2521. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2522. break;
  2523. }
  2524. case Set:
  2525. {
  2526. //
  2527. // Compilation scheme:
  2528. //
  2529. // Match(Char|Set) (non optional)
  2530. // OptMatch(Char|Set) (optional)
  2531. //
  2532. if (isOptional)
  2533. {
  2534. if (firstSet->IsSingleton())
  2535. EMIT(compiler, OptMatchCharInst, firstSet->Singleton());
  2536. else
  2537. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2538. }
  2539. else
  2540. {
  2541. if (firstSet->IsSingleton())
  2542. EMIT(compiler, MatchCharInst, firstSet->Singleton());
  2543. else
  2544. EMIT(compiler, MatchSetInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2545. }
  2546. break;
  2547. }
  2548. }
  2549. }
  2550. CharCount AltNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2551. {
  2552. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2553. Assert(!isHeadSyncronizingNode);
  2554. //
  2555. // Compilation scheme:
  2556. //
  2557. // SyncToLiteralsAndBackup
  2558. //
  2559. SyncToLiteralsAndBackupInst* i =
  2560. EMIT(
  2561. compiler,
  2562. SyncToLiteralsAndBackupInst,
  2563. compiler.GetScriptContext()->GetRecycler(),
  2564. compiler.GetProgram(),
  2565. prevConsumes);
  2566. CollectSyncronizingLiterals(compiler, *i);
  2567. return 0;
  2568. }
  2569. bool AltNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2570. {
  2571. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2572. if (tail == 0 || tail->tail != 0)
  2573. // Must be exactly two alts
  2574. return false;
  2575. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2576. {
  2577. if (!oi->BeginConcat())
  2578. return false;
  2579. if (!curr->head->IsOctoquad(compiler, oi))
  2580. return false;
  2581. }
  2582. return true;
  2583. }
  2584. bool AltNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2585. {
  2586. return false;
  2587. }
  2588. bool AltNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2589. {
  2590. Assert(false);
  2591. return false;
  2592. }
  2593. #if ENABLE_REGEX_CONFIG_OPTIONS
  2594. void AltNode::Print(DebugWriter* w, const Char* litbuf) const
  2595. {
  2596. w->PrintEOL(_u("Alt()"));
  2597. PrintAnnotations(w);
  2598. w->PrintEOL(_u("{"));
  2599. w->Indent();
  2600. for (const AltNode *curr = this; curr != 0; curr = curr->tail)
  2601. curr->head->Print(w, litbuf);
  2602. w->Unindent();
  2603. w->PrintEOL(_u("}"));
  2604. }
  2605. #endif
  2606. // ----------------------------------------------------------------------
  2607. // DefineGroupNode
  2608. // ----------------------------------------------------------------------
  2609. CharCount DefineGroupNode::LiteralLength() const
  2610. {
  2611. return 0;
  2612. }
  2613. bool DefineGroupNode::IsCharOrPositiveSet() const
  2614. {
  2615. return false;
  2616. }
  2617. CharCount DefineGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2618. {
  2619. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2620. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  2621. return body->TransferPass0(compiler, litbuf);
  2622. }
  2623. void DefineGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2624. {
  2625. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2626. body->TransferPass1(compiler, litbuf);
  2627. }
  2628. bool DefineGroupNode::IsRefiningAssertion(Compiler& compiler)
  2629. {
  2630. return false;
  2631. }
  2632. void DefineGroupNode::AnnotatePass0(Compiler& compiler)
  2633. {
  2634. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2635. body->AnnotatePass0(compiler);
  2636. isWord = body->isWord;
  2637. }
  2638. void DefineGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2639. {
  2640. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2641. features = HasDefineGroup;
  2642. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, parentNotNegated);
  2643. features |= body->features;
  2644. thisConsumes = body->thisConsumes;
  2645. firstSet = body->firstSet;
  2646. isFirstExact = body->isFirstExact;
  2647. isThisIrrefutable = body->isThisIrrefutable;
  2648. isThisWillNotProgress = body->isThisWillNotProgress;
  2649. isThisWillNotRegress = body->isThisWillNotRegress;
  2650. isNotInLoop = parentNotInLoop;
  2651. isAtLeastOnce = parentAtLeastOnce;
  2652. isNotSpeculative = parentNotSpeculative;
  2653. isNotNegated = parentNotNegated;
  2654. }
  2655. void DefineGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  2656. {
  2657. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2658. prevConsumes = accumConsumes;
  2659. isPrevWillNotProgress = accumPrevWillNotProgress;
  2660. isPrevWillNotRegress = accumPrevWillNotRegress;
  2661. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  2662. }
  2663. void DefineGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  2664. {
  2665. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2666. followConsumes = accumConsumes;
  2667. followSet = accumFollow;
  2668. isFollowIrrefutable = accumFollowIrrefutable;
  2669. isFollowEOL = accumFollowEOL;
  2670. body->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  2671. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  2672. }
  2673. void DefineGroupNode::AnnotatePass4(Compiler& compiler)
  2674. {
  2675. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2676. body->AnnotatePass4(compiler);
  2677. isDeterministic = body->isDeterministic;
  2678. // If the follow is irrefutable and we're not in an assertion, then we are not going to backtrack beyond this point, so
  2679. // we don't need to save the group before updating it
  2680. noNeedToSave = isFollowIrrefutable && isNotSpeculative;
  2681. // Compilation scheme: Chomp
  2682. //
  2683. // Body consists of a loop node with a Chomp compilation scheme.
  2684. if(body->tag == NodeTag::Loop)
  2685. {
  2686. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2687. if(loop->scheme == LoopNode::CompilationScheme::Chomp && loop->repeats.lower <= 1 && loop->repeats.IsUnbounded())
  2688. {
  2689. // **COMMIT**
  2690. scheme = Chomp;
  2691. return;
  2692. }
  2693. }
  2694. // Compilation scheme: Fixed
  2695. //
  2696. // Body has fixed width, so don't need a Begin instruction to keep track of the input start offset of the group.
  2697. if (body->thisConsumes.IsFixed())
  2698. {
  2699. // **COMMIT**
  2700. scheme = Fixed;
  2701. return;
  2702. }
  2703. // Compilation scheme: BeginEnd
  2704. //
  2705. // If both the body and the follow are irrefutable, we're not in any loops, and we're not in an assertion,
  2706. // then we don't need to save the group before updating it.
  2707. // **COMMIT**
  2708. scheme = BeginEnd;
  2709. }
  2710. bool DefineGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  2711. {
  2712. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2713. if (scheme != Fixed)
  2714. // We can't skip over part of the match if the BeginDefineGroup must capture it's start
  2715. return false;
  2716. return body->SupportsPrefixSkipping(compiler);
  2717. }
  2718. Node* DefineGroupNode::HeadSyncronizingNode(Compiler& compiler)
  2719. {
  2720. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2721. if (scheme != Fixed)
  2722. // Can't skip BeginDefineGroup
  2723. return 0;
  2724. return body->HeadSyncronizingNode(compiler);
  2725. }
  2726. CharCount DefineGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2727. {
  2728. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2729. return body->MinSyncronizingLiteralLength(compiler, numLiterals);
  2730. }
  2731. void DefineGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2732. {
  2733. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2734. body->CollectSyncronizingLiterals(compiler, scanners);
  2735. }
  2736. void DefineGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2737. {
  2738. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2739. body->BestSyncronizingNode(compiler, bestNode);
  2740. }
  2741. void DefineGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2742. {
  2743. PROBE_STACK(scriptContext, Js::Constants::MinStackRegex);
  2744. if (groupId < minGroup)
  2745. minGroup = groupId;
  2746. if (groupId > maxGroup)
  2747. maxGroup = groupId;
  2748. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2749. }
  2750. void DefineGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  2751. {
  2752. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2753. switch (scheme)
  2754. {
  2755. case Chomp:
  2756. {
  2757. // Compilation scheme:
  2758. //
  2759. // Chomp(Char|Set)Group
  2760. Assert(body->tag == NodeTag::Loop);
  2761. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2762. const CharSet<Char> *const loopBodyFirstSet = loop->body->firstSet;
  2763. const CountDomain &repeats = loop->repeats;
  2764. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2765. if(loopBodyFirstSet->IsSingleton())
  2766. {
  2767. const Char c = loopBodyFirstSet->Singleton();
  2768. if(repeats.lower == 0)
  2769. EMIT(compiler, ChompCharGroupInst<ChompMode::Star>, c, groupId, noNeedToSave);
  2770. else
  2771. EMIT(compiler, ChompCharGroupInst<ChompMode::Plus>, c, groupId, noNeedToSave);
  2772. }
  2773. else
  2774. {
  2775. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2776. RuntimeCharSet<Char> *runtimeSet;
  2777. if(repeats.lower == 0)
  2778. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Star>, groupId, noNeedToSave)->set;
  2779. else
  2780. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Plus>, groupId, noNeedToSave)->set;
  2781. runtimeSet->CloneFrom(compiler.rtAllocator, *loopBodyFirstSet);
  2782. }
  2783. break;
  2784. }
  2785. case Fixed:
  2786. {
  2787. // Compilation scheme:
  2788. //
  2789. // <body>
  2790. // DefineGroup
  2791. Assert(body->thisConsumes.IsFixed());
  2792. body->Emit(compiler, skipped);
  2793. EMIT(compiler, DefineGroupFixedInst, groupId, body->thisConsumes.lower, noNeedToSave);
  2794. break;
  2795. }
  2796. case BeginEnd:
  2797. {
  2798. // Compilation scheme:
  2799. //
  2800. // BeginDefineGroup
  2801. // <body>
  2802. // EndDefineGroup
  2803. Assert(skipped == 0);
  2804. EMIT(compiler, BeginDefineGroupInst, groupId);
  2805. body->Emit(compiler, skipped);
  2806. EMIT(compiler, EndDefineGroupInst, groupId, noNeedToSave);
  2807. break;
  2808. }
  2809. }
  2810. }
  2811. CharCount DefineGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2812. {
  2813. Assert(false);
  2814. return 0;
  2815. }
  2816. bool DefineGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2817. {
  2818. return false;
  2819. }
  2820. bool DefineGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2821. {
  2822. return false;
  2823. }
  2824. bool DefineGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2825. {
  2826. Assert(false);
  2827. return false;
  2828. }
  2829. #if ENABLE_REGEX_CONFIG_OPTIONS
  2830. void DefineGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  2831. {
  2832. w->PrintEOL(_u("DefineGroup(%d)"), groupId);
  2833. PrintAnnotations(w);
  2834. w->PrintEOL(_u("{"));
  2835. w->Indent();
  2836. body->Print(w, litbuf);
  2837. w->Unindent();
  2838. w->PrintEOL(_u("}"));
  2839. }
  2840. #endif
  2841. // ----------------------------------------------------------------------
  2842. // MatchGroupNode
  2843. // ----------------------------------------------------------------------
  2844. CharCount MatchGroupNode::LiteralLength() const
  2845. {
  2846. return 0;
  2847. }
  2848. bool MatchGroupNode::IsCharOrPositiveSet() const
  2849. {
  2850. return false;
  2851. }
  2852. CharCount MatchGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2853. {
  2854. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  2855. return 0;
  2856. }
  2857. void MatchGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2858. {
  2859. }
  2860. bool MatchGroupNode::IsRefiningAssertion(Compiler& compiler)
  2861. {
  2862. return false;
  2863. }
  2864. void MatchGroupNode::AnnotatePass0(Compiler& compiler)
  2865. {
  2866. isWord = false;
  2867. }
  2868. void MatchGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2869. {
  2870. features = HasMatchGroup;
  2871. thisConsumes.lower = 0;
  2872. thisConsumes.upper = CharCountFlag;
  2873. firstSet = compiler.standardChars->GetFullSet();
  2874. isFirstExact = false;
  2875. isThisIrrefutable = false;
  2876. isThisWillNotProgress = true;
  2877. isThisWillNotRegress = true;
  2878. isNotInLoop = parentNotInLoop;
  2879. isAtLeastOnce = parentAtLeastOnce;
  2880. isNotSpeculative = parentNotSpeculative;
  2881. isNotNegated = parentNotNegated;
  2882. }
  2883. void MatchGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  2884. {
  2885. prevConsumes = accumConsumes;
  2886. isPrevWillNotProgress = accumPrevWillNotProgress;
  2887. isPrevWillNotRegress = accumPrevWillNotRegress;
  2888. }
  2889. void MatchGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  2890. {
  2891. followConsumes = accumConsumes;
  2892. followSet = accumFollow;
  2893. isFollowIrrefutable = accumFollowIrrefutable;
  2894. isFollowEOL = accumFollowEOL;
  2895. }
  2896. void MatchGroupNode::AnnotatePass4(Compiler& compiler)
  2897. {
  2898. isDeterministic = true;
  2899. }
  2900. bool MatchGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  2901. {
  2902. return false;
  2903. }
  2904. Node* MatchGroupNode::HeadSyncronizingNode(Compiler& compiler)
  2905. {
  2906. return 0;
  2907. }
  2908. CharCount MatchGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2909. {
  2910. return 0;
  2911. }
  2912. void MatchGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2913. {
  2914. Assert(false);
  2915. }
  2916. void MatchGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2917. {
  2918. }
  2919. void MatchGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2920. {
  2921. }
  2922. void MatchGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  2923. {
  2924. Assert(skipped == 0);
  2925. //
  2926. // Compilation scheme:
  2927. //
  2928. // MatchGroup
  2929. //
  2930. EMIT(compiler, MatchGroupInst, groupId);
  2931. }
  2932. CharCount MatchGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2933. {
  2934. Assert(false);
  2935. return 0;
  2936. }
  2937. bool MatchGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2938. {
  2939. return false;
  2940. }
  2941. bool MatchGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2942. {
  2943. return false;
  2944. }
  2945. bool MatchGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2946. {
  2947. Assert(false);
  2948. return false;
  2949. }
  2950. #if ENABLE_REGEX_CONFIG_OPTIONS
  2951. void MatchGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  2952. {
  2953. w->PrintEOL(_u("MatchGroup(%d)"), groupId);
  2954. PrintAnnotations(w);
  2955. }
  2956. #endif
  2957. // ----------------------------------------------------------------------
  2958. // LoopNode
  2959. // ----------------------------------------------------------------------
  2960. CharCount LoopNode::LiteralLength() const
  2961. {
  2962. return 0;
  2963. }
  2964. bool LoopNode::IsCharOrPositiveSet() const
  2965. {
  2966. return false;
  2967. }
  2968. CharCount LoopNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2969. {
  2970. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2971. Assert(repeats.upper == CharCountFlag || repeats.upper > 0);
  2972. Assert(repeats.upper == CharCountFlag || repeats.upper >= repeats.lower);
  2973. Assert(!(repeats.lower == 1 && repeats.upper == 1));
  2974. return body->TransferPass0(compiler, litbuf);
  2975. }
  2976. void LoopNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2977. {
  2978. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2979. body->TransferPass1(compiler, litbuf);
  2980. }
  2981. bool LoopNode::IsRefiningAssertion(Compiler& compiler)
  2982. {
  2983. return false;
  2984. }
  2985. void LoopNode::AnnotatePass0(Compiler& compiler)
  2986. {
  2987. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2988. body->AnnotatePass0(compiler);
  2989. isWord = !repeats.CouldMatchEmpty() && body->isWord;
  2990. }
  2991. void LoopNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2992. {
  2993. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  2994. features = HasLoop;
  2995. isNotInLoop = parentNotInLoop;
  2996. isAtLeastOnce = parentAtLeastOnce;
  2997. isNotSpeculative = parentNotSpeculative;
  2998. isNotNegated = parentNotNegated;
  2999. body->AnnotatePass1(compiler, false, parentAtLeastOnce && repeats.lower > 0, parentNotSpeculative, isNotNegated);
  3000. features |= body->features;
  3001. thisConsumes = body->thisConsumes;
  3002. thisConsumes.Mult(repeats);
  3003. firstSet = body->firstSet;
  3004. isFirstExact = repeats.lower > 0 && body->isFirstExact;
  3005. isThisIrrefutable = repeats.CouldMatchEmpty() || body->isThisIrrefutable;
  3006. // Caution: Even if a greedy loop has a 'isThisWillNotProgress' body, if the body has choicepoints then
  3007. // a backtrack could resume execution at an earlier loop iteration, which may then continue to repeat
  3008. // the loop beyond the input offset which triggered the backtrack. Ideally we'd use the body's isDeterministic
  3009. // flag to tell us when that can't happen, but it's not available till pass 4, so we must make do with
  3010. // a simple-minded structural approximation.
  3011. isThisWillNotProgress = (isGreedy || repeats.IsExact(1)) && body->isThisWillNotProgress && body->IsObviouslyDeterministic();
  3012. isThisWillNotRegress = (!isGreedy || repeats.IsExact(1)) && body->isThisWillNotRegress && body->IsObviouslyDeterministic();
  3013. }
  3014. void LoopNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3015. {
  3016. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3017. prevConsumes = accumConsumes;
  3018. isPrevWillNotProgress = accumPrevWillNotProgress;
  3019. isPrevWillNotRegress = accumPrevWillNotRegress;
  3020. // May have already gone through loop when starting body
  3021. CountDomain bodyConsumes = body->thisConsumes;
  3022. CharCountOrFlag prevMax = repeats.upper;
  3023. if (prevMax != CharCountFlag)
  3024. prevMax--;
  3025. CountDomain prevLoops(0, prevMax);
  3026. bodyConsumes.Mult(prevLoops);
  3027. accumConsumes.Add(bodyConsumes);
  3028. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress && isThisWillNotProgress, accumPrevWillNotRegress && isThisWillNotRegress);
  3029. }
  3030. void LoopNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3031. {
  3032. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3033. followConsumes = accumConsumes;
  3034. followSet = accumFollow;
  3035. isFollowIrrefutable = accumFollowIrrefutable;
  3036. isFollowEOL = accumFollowEOL;
  3037. // May go through loop again when leaving body
  3038. CountDomain bodyConsumes = body->thisConsumes;
  3039. CharCountOrFlag nextMax = repeats.upper;
  3040. if (nextMax != CharCountFlag)
  3041. nextMax--;
  3042. CountDomain nextLoops(0, nextMax);
  3043. bodyConsumes.Mult(nextLoops);
  3044. accumConsumes.Add(bodyConsumes);
  3045. CharSet<Char>* innerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  3046. innerFollow->UnionInPlace(compiler.ctAllocator, *accumFollow);
  3047. innerFollow->UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3048. /*
  3049. All of the following must be true for the loop body's follow to be irrefutable:
  3050. The loop's follow is irrefutable.
  3051. The loop can complete the required minimum number of iterations of the body without backtracking into a completed
  3052. iteration of the body.
  3053. - If repeats.lower == 0, the required minimum number of iterations is met without executing the body
  3054. - If repeats.lower == 1
  3055. - If the first iteration of the body fails, there is no previous iteration of the body to backtrack into
  3056. - After completing the first iteration of the body, the loop cannot reject the first iteration for not
  3057. making progress because the iteration is required for the loop to succeed
  3058. - If repeats.lower >= 2
  3059. - If the second iteration of the body fails, it will backtrack into the first iteration of the body
  3060. - To prevent this, the body must be irrefutable
  3061. After completing the required minimum number of iterations of the body, the loop cannot reject a subsequent
  3062. completed iteration of the body for not making progress.
  3063. - If !isGreedy || repeats.IsFixed(), there will not be any more iterations of the body, as it will proceed to
  3064. the irrefutable follow
  3065. - If !body->thisConsumes.CouldMatchEmpty(), subsequent iterations of the body cannot complete without making
  3066. progress
  3067. */
  3068. const bool isBodyFollowIrrefutable =
  3069. accumFollowIrrefutable &&
  3070. (repeats.lower <= 1 || body->isThisIrrefutable) &&
  3071. (!isGreedy || !body->thisConsumes.CouldMatchEmpty() || repeats.IsFixed());
  3072. body->AnnotatePass3(compiler, accumConsumes, innerFollow, isBodyFollowIrrefutable, false);
  3073. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3074. }
  3075. void LoopNode::AnnotatePass4(Compiler& compiler)
  3076. {
  3077. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3078. body->AnnotatePass4(compiler);
  3079. isDeterministic = body->isDeterministic;
  3080. //
  3081. // Loops can be defined by unfolding:
  3082. // r* === (rr*|)
  3083. // r*? === (|rr*?)
  3084. // Thus many of the optimizations for alternatives carry over to loops.
  3085. //
  3086. //
  3087. // Compilation scheme: None
  3088. //
  3089. // If overall loop is empty-only then emit nothing.
  3090. // (Parser has already eliminated loops with upper == 0, so this can only happen if the body is empty-only)
  3091. //
  3092. // If loop is non-greedy with lower 0 and follow is irrefutable, then loop body will never be executed
  3093. // no emit nothing.
  3094. //
  3095. if (body->IsEmptyOnly() ||
  3096. (!isGreedy && repeats.lower == 0 && isFollowIrrefutable))
  3097. {
  3098. // **COMMIT**
  3099. scheme = None;
  3100. return;
  3101. }
  3102. //
  3103. // Compilation scheme: Chain/Try
  3104. //
  3105. // If loop is greedy, with lower 0 and upper 1, then we'd like to treat it as body|<empty> so as to avoid all the loop
  3106. // overhead. However, if the body could match empty, then a match against empty must be treated as an 'iteration' of the
  3107. // loop body which made no progress. So we treat as a general loop in that case. Otherwise, we may inline two of
  3108. // AltNode's compilation schemes:
  3109. // Examples:
  3110. // - /(a*)?/.exec("") must leave group 1 undefined rather than empty.
  3111. // - /(?:a||b)?/.exec("b") chooses the empty alt, then must backtrack due to no progress, and match 'b'.
  3112. // This is not the same as /a||b|/, as picking the first empty alt would result in success.
  3113. //
  3114. // (cf AltNode's None/Switch/Chain/Set, isOptional compilation scheme)
  3115. // If body cannot match empty, follow cannot match empty, and the body FIRST set is disjoint from the FOLLOW
  3116. // set, then we can commit to the body using a 1 char lookahead.
  3117. //
  3118. // If body cannot match empty, and follow is irrefutable, then we can commit to the body using a 1 char
  3119. // lookahead provided:
  3120. // ** the body can't fail if given an arbitrary input starting with a character in its FIRST set **
  3121. //
  3122. // (cf AltNode's Try compilation scheme)
  3123. // Otherwise, protect body by a Try instruction.
  3124. //
  3125. if (isGreedy && repeats.lower == 0 && repeats.upper == 1 && !body->thisConsumes.CouldMatchEmpty())
  3126. {
  3127. // **COMMIT**
  3128. // Note that the FIRST of the loop is already the union of the body FIRST and the loop FOLLOW
  3129. if (!body->thisConsumes.CouldMatchEmpty() &&
  3130. ((!followConsumes.CouldMatchEmpty() && firstSet->Count() == body->firstSet->Count() + followSet->Count()) ||
  3131. (isFollowIrrefutable && body->IsSimpleOneChar())))
  3132. {
  3133. if (body->IsSimpleOneChar())
  3134. scheme = Set;
  3135. else
  3136. scheme = Chain;
  3137. }
  3138. else
  3139. {
  3140. scheme = Try;
  3141. isDeterministic = false; // NON-DETERMINISTIC
  3142. }
  3143. return;
  3144. }
  3145. //
  3146. // Compilation scheme: Chomp/ChompGroupLastChar
  3147. //
  3148. // If the body is a simple-one-char, or a group of a simple-one-char, and either:
  3149. // - follow is non-empty and FIRST and FOLLOW are disjoint
  3150. // - loop is greedy and follow is irrefutable
  3151. // - follow is EOL
  3152. // then consume up to upper number of characters in FIRST and fail if number consumed is not >= lower.
  3153. //
  3154. if (body->IsSimpleOneChar() || (body->tag == DefineGroup && ((DefineGroupNode*)body)->body->IsSimpleOneChar()))
  3155. {
  3156. if (!followConsumes.CouldMatchEmpty())
  3157. {
  3158. CharSet<Char> unionSet;
  3159. CharCount totalChars = 0;
  3160. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3161. totalChars += body->firstSet->Count();
  3162. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3163. totalChars += followSet->Count();
  3164. if (totalChars == unionSet.Count())
  3165. {
  3166. // **COMMIT**
  3167. if (body->tag == DefineGroup)
  3168. {
  3169. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3170. scheme = ChompGroupLastChar;
  3171. }
  3172. else
  3173. scheme = Chomp;
  3174. return;
  3175. }
  3176. }
  3177. if ((isGreedy && isFollowIrrefutable) || isFollowEOL)
  3178. {
  3179. // **COMMIT**
  3180. if (body->tag == DefineGroup)
  3181. {
  3182. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3183. scheme = ChompGroupLastChar;
  3184. }
  3185. else
  3186. scheme = Chomp;
  3187. return;
  3188. }
  3189. }
  3190. //
  3191. // Compilation scheme: Guarded
  3192. //
  3193. // If body cannot match empty, follow cannot match empty, and FIRST of body and FOLLOW are
  3194. // disjoint then can use 1 char lookahead to decide whether to commit to another loop body.
  3195. // (If the loop body fails then we know the follow will fail even with one more/fewer iterations of the
  3196. // loop body, so we can let that failure propagate without needing to push choicepoints.)
  3197. //
  3198. if (!body->thisConsumes.CouldMatchEmpty() && !followConsumes.CouldMatchEmpty())
  3199. {
  3200. CharSet<Char> unionSet;
  3201. CharCount totalChars = 0;
  3202. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3203. totalChars += body->firstSet->Count();
  3204. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3205. totalChars += followSet->Count();
  3206. if (totalChars == unionSet.Count())
  3207. {
  3208. // **COMMIT**
  3209. scheme = Guarded;
  3210. return;
  3211. }
  3212. }
  3213. //
  3214. // Compilation scheme: Fixed/FixedSet/FixedGroupLastIteration
  3215. //
  3216. // If loop is greedy, body is deterministic, non-zero fixed width, and either does not define any groups
  3217. // or has one outermost group, then we can keep track of the backtracking information in constant space.
  3218. //
  3219. // If body does have an outer group, we can avoid saving the existing group contents if the follow
  3220. // is irrefutable, we're not in an outer loop, and we're not in an assertion.
  3221. //
  3222. if (isGreedy && body->isDeterministic && !body->thisConsumes.CouldMatchEmpty() && body->thisConsumes.IsFixed())
  3223. {
  3224. if (body->tag == DefineGroup)
  3225. {
  3226. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3227. if (!bodyGroup->body->ContainsDefineGroup())
  3228. {
  3229. // **COMMIT**
  3230. scheme = FixedGroupLastIteration;
  3231. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3232. isDeterministic = false; // NON-DETERMINISTIC;
  3233. return;
  3234. }
  3235. }
  3236. else if (body->IsSimpleOneChar())
  3237. {
  3238. // **COMMIT**
  3239. scheme = FixedSet;
  3240. isDeterministic = false; // NON-DETERMINISTIC
  3241. return;
  3242. }
  3243. else if (!body->ContainsDefineGroup())
  3244. {
  3245. // **COMMIT**
  3246. scheme = Fixed;
  3247. isDeterministic = false; // NON-DETERMINISTIC
  3248. return;
  3249. }
  3250. }
  3251. //
  3252. // Compilation scheme: GreedyNoBacktrack
  3253. //
  3254. // If loop is greedy with lower == 0 and upper == inf, the loop body is deterministic and does not define
  3255. // groups, and follow is irrefutable, then we will never have to try fewer iterations of the loop once
  3256. // entering the follow. Thus we only need one continuation record on the stack to protect against failure
  3257. // for each attempt at the loop body.
  3258. //
  3259. if (isGreedy && repeats.lower == 0 && repeats.upper == CharCountFlag && body->isDeterministic && !body->ContainsDefineGroup() && isFollowIrrefutable)
  3260. {
  3261. // **COMMIT**
  3262. scheme = GreedyNoBacktrack;
  3263. return;
  3264. }
  3265. //
  3266. // Compilation scheme: BeginEnd
  3267. //
  3268. scheme = BeginEnd;
  3269. isDeterministic = false; // NON-DETERMINISTIC
  3270. }
  3271. bool LoopNode::SupportsPrefixSkipping(Compiler& compiler) const
  3272. {
  3273. return false;
  3274. }
  3275. Node* LoopNode::HeadSyncronizingNode(Compiler& compiler)
  3276. {
  3277. return 0;
  3278. }
  3279. CharCount LoopNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3280. {
  3281. return 0;
  3282. }
  3283. void LoopNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3284. {
  3285. Assert(false);
  3286. }
  3287. void LoopNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3288. {
  3289. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3290. if (repeats.lower > 0)
  3291. body->BestSyncronizingNode(compiler, bestNode);
  3292. // else: can't be sure loop will be taken
  3293. }
  3294. void LoopNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3295. {
  3296. PROBE_STACK(scriptContext, Js::Constants::MinStackRegex);
  3297. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3298. }
  3299. void LoopNode::Emit(Compiler& compiler, CharCount& skipped)
  3300. {
  3301. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3302. Assert(skipped == 0);
  3303. switch (scheme)
  3304. {
  3305. case BeginEnd:
  3306. {
  3307. //
  3308. // Compilation scheme:
  3309. //
  3310. // Lloop: BeginLoop Lexit
  3311. // <loop body>
  3312. // RepeatLoop Lloop
  3313. // Lexit:
  3314. //
  3315. int minBodyGroupId = compiler.program->numGroups;
  3316. int maxBodyGroupId = -1;
  3317. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3318. Label beginLabel = compiler.CurrentLabel();
  3319. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId, isGreedy)->exitLabel);
  3320. body->Emit(compiler, skipped);
  3321. EMIT(compiler, RepeatLoopInst, beginLabel);
  3322. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3323. break;
  3324. }
  3325. case None:
  3326. {
  3327. // Nothing to emit
  3328. break;
  3329. }
  3330. case Chomp:
  3331. {
  3332. //
  3333. // Compilation scheme:
  3334. //
  3335. // Chomp(Char|Set)(Star|Plus|Bounded)
  3336. //
  3337. if(body->firstSet->IsSingleton())
  3338. {
  3339. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3340. {
  3341. if(repeats.lower == 0)
  3342. EMIT(compiler, ChompCharInst<ChompMode::Star>, body->firstSet->Singleton());
  3343. else
  3344. EMIT(compiler, ChompCharInst<ChompMode::Plus>, body->firstSet->Singleton());
  3345. }
  3346. else
  3347. EMIT(compiler, ChompCharBoundedInst, body->firstSet->Singleton(), repeats);
  3348. }
  3349. else
  3350. {
  3351. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3352. {
  3353. if(repeats.lower == 0)
  3354. EMIT(compiler, ChompSetInst<ChompMode::Star>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3355. else
  3356. EMIT(compiler, ChompSetInst<ChompMode::Plus>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3357. }
  3358. else
  3359. EMIT(compiler, ChompSetBoundedInst, repeats)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3360. }
  3361. break;
  3362. }
  3363. case ChompGroupLastChar:
  3364. {
  3365. //
  3366. // Compilation scheme:
  3367. //
  3368. // ChompSetGroup
  3369. //
  3370. Assert(body->tag == DefineGroup);
  3371. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3372. EMIT(compiler, ChompSetBoundedGroupLastCharInst, repeats, bodyGroup->groupId, noNeedToSave)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3373. break;
  3374. }
  3375. case Guarded:
  3376. {
  3377. //
  3378. // Compilation scheme:
  3379. //
  3380. // Lloop: BeginLoopIf(Char|Set) Lexit
  3381. // <loop body>
  3382. // RepeatLoopIf(Char|Set) Lloop
  3383. // Lexit:
  3384. //
  3385. int minBodyGroupId = compiler.program->numGroups;
  3386. int maxBodyGroupId = -1;
  3387. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3388. Label beginLabel = compiler.CurrentLabel();
  3389. Label exitFixup;
  3390. if (body->firstSet->IsSingleton())
  3391. exitFixup = compiler.GetFixup(&EMIT(compiler, BeginLoopIfCharInst, body->firstSet->Singleton(), compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId)->exitLabel);
  3392. else
  3393. {
  3394. BeginLoopIfSetInst* i = EMIT(compiler, BeginLoopIfSetInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId);
  3395. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3396. exitFixup = compiler.GetFixup(&i->exitLabel);
  3397. }
  3398. body->Emit(compiler, skipped);
  3399. if (body->firstSet->IsSingleton())
  3400. EMIT(compiler, RepeatLoopIfCharInst, beginLabel);
  3401. else
  3402. EMIT(compiler, RepeatLoopIfSetInst, beginLabel);
  3403. compiler.DoFixup(exitFixup, compiler.CurrentLabel());
  3404. break;
  3405. }
  3406. case Fixed:
  3407. {
  3408. //
  3409. // Compilation scheme:
  3410. //
  3411. // Lloop: BeginLoopFixed Lexit
  3412. // <loop body>
  3413. // RepeatLoopFixed Lloop
  3414. // Lexit:
  3415. //
  3416. Assert(!body->ContainsDefineGroup());
  3417. Assert(body->thisConsumes.IsFixed());
  3418. Assert(body->thisConsumes.lower > 0);
  3419. Assert(body->isDeterministic);
  3420. Label beginLabel = compiler.CurrentLabel();
  3421. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower)->exitLabel);
  3422. body->Emit(compiler, skipped);
  3423. EMIT(compiler, RepeatLoopFixedInst, beginLabel);
  3424. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3425. break;
  3426. }
  3427. case FixedSet:
  3428. {
  3429. //
  3430. // Compilation scheme:
  3431. //
  3432. // LoopSet
  3433. //
  3434. Assert(body->IsSimpleOneChar());
  3435. EMIT(compiler, LoopSetInst, compiler.NextLoopId(), repeats, !isNotInLoop)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3436. break;
  3437. }
  3438. case FixedGroupLastIteration:
  3439. {
  3440. //
  3441. // Compilation scheme:
  3442. //
  3443. // Lloop: BeginLoopFixedGroupLastIteration Lexit
  3444. // <loop body>
  3445. // RepeatLoopFixedGroupLastIteration Lloop
  3446. // Lexit:
  3447. //
  3448. Assert(body->tag == DefineGroup);
  3449. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3450. Assert(body->thisConsumes.IsFixed());
  3451. Assert(body->thisConsumes.lower > 0);
  3452. Assert(body->isDeterministic);
  3453. Label beginLabel = compiler.CurrentLabel();
  3454. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedGroupLastIterationInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower, bodyGroup->groupId, noNeedToSave)->exitLabel);
  3455. bodyGroup->body->Emit(compiler, skipped);
  3456. EMIT(compiler, RepeatLoopFixedGroupLastIterationInst, beginLabel);
  3457. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3458. break;
  3459. }
  3460. case GreedyNoBacktrack:
  3461. {
  3462. //
  3463. // Compilation scheme:
  3464. //
  3465. // Lloop: BeginGreedyLoopNoBacktrack Lexit
  3466. // <loop body>
  3467. // RepeatGreedyLoopNoBacktrack Lloop
  3468. // Lexit:
  3469. //
  3470. Assert(!body->ContainsDefineGroup());
  3471. Assert(isGreedy);
  3472. Assert(repeats.lower == 0);
  3473. Assert(repeats.upper == CharCountFlag);
  3474. Assert(body->isDeterministic);
  3475. Label beginLabel = compiler.CurrentLabel();
  3476. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginGreedyLoopNoBacktrackInst, compiler.NextLoopId())->exitLabel);
  3477. body->Emit(compiler, skipped);
  3478. EMIT(compiler, RepeatGreedyLoopNoBacktrackInst, beginLabel);
  3479. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3480. break;
  3481. }
  3482. case Set:
  3483. {
  3484. //
  3485. // Compilation scheme:
  3486. //
  3487. // OptMatch(Char|Set)
  3488. //
  3489. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3490. if (body->firstSet->IsSingleton())
  3491. EMIT(compiler, OptMatchCharInst, body->firstSet->Singleton());
  3492. else
  3493. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3494. break;
  3495. }
  3496. case Chain:
  3497. {
  3498. //
  3499. // Compilation scheme:
  3500. //
  3501. // JumpIfNot(Char|Set) Lexit
  3502. // <body>
  3503. // Lexit:
  3504. //
  3505. //
  3506. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3507. Label jumpFixup = 0;
  3508. CharCount bodySkipped = 0;
  3509. if (body->firstSet->IsSingleton())
  3510. {
  3511. if (body->SupportsPrefixSkipping(compiler))
  3512. {
  3513. jumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, body->firstSet->Singleton())->targetLabel);
  3514. bodySkipped = 1;
  3515. }
  3516. else
  3517. jumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, body->firstSet->Singleton())->targetLabel);
  3518. }
  3519. else
  3520. {
  3521. if (body->SupportsPrefixSkipping(compiler))
  3522. {
  3523. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  3524. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3525. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3526. bodySkipped = 1;
  3527. }
  3528. else
  3529. {
  3530. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  3531. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3532. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3533. }
  3534. }
  3535. body->Emit(compiler, bodySkipped);
  3536. compiler.DoFixup(jumpFixup, compiler.CurrentLabel());
  3537. break;
  3538. }
  3539. case Try:
  3540. {
  3541. //
  3542. // Compilation scheme:
  3543. //
  3544. // Try((If|Match)(Char|Set))? Lexit
  3545. // <loop body>
  3546. // Lexit:
  3547. //
  3548. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3549. Label tryFixup = 0;
  3550. CharCount bodySkipped = 0;
  3551. // HEURISTIC: if the first set of the body is exact or small, and the
  3552. // body does not match empty, then it's probably worth using
  3553. // a Try(If|Match)(Char|Set)
  3554. if (!body->thisConsumes.CouldMatchEmpty() &&
  3555. (body->isFirstExact || body->firstSet->Count() <= maxCharsForConditionalTry))
  3556. {
  3557. if (body->SupportsPrefixSkipping(compiler))
  3558. {
  3559. if (body->firstSet->IsSingleton())
  3560. tryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, body->firstSet->Singleton())->failLabel);
  3561. else
  3562. {
  3563. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  3564. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3565. tryFixup = compiler.GetFixup(&i->failLabel);
  3566. }
  3567. bodySkipped = 1;
  3568. }
  3569. else
  3570. {
  3571. if(body->firstSet->IsSingleton())
  3572. tryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, body->firstSet->Singleton())->failLabel);
  3573. else
  3574. {
  3575. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  3576. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3577. tryFixup = compiler.GetFixup(&i->failLabel);
  3578. }
  3579. }
  3580. }
  3581. else
  3582. tryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  3583. body->Emit(compiler, bodySkipped);
  3584. // Fixup Try
  3585. compiler.DoFixup(tryFixup, compiler.CurrentLabel());
  3586. break;
  3587. }
  3588. }
  3589. }
  3590. CharCount LoopNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3591. {
  3592. Assert(false);
  3593. return 0;
  3594. }
  3595. bool LoopNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3596. {
  3597. return false;
  3598. }
  3599. bool LoopNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3600. {
  3601. return false;
  3602. }
  3603. bool LoopNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3604. {
  3605. Assert(false);
  3606. return false;
  3607. }
  3608. #if ENABLE_REGEX_CONFIG_OPTIONS
  3609. void LoopNode::Print(DebugWriter* w, const Char* litbuf) const
  3610. {
  3611. w->Print(_u("Loop("));
  3612. repeats.Print(w);
  3613. w->PrintEOL(_u(", %s)"), isGreedy ? _u("greedy") : _u("non-greedy"));
  3614. PrintAnnotations(w);
  3615. w->PrintEOL(_u("{"));
  3616. w->Indent();
  3617. body->Print(w, litbuf);
  3618. w->Unindent();
  3619. w->PrintEOL(_u("}"));
  3620. }
  3621. #endif
  3622. // ----------------------------------------------------------------------
  3623. // AssertionNode
  3624. // ----------------------------------------------------------------------
  3625. CharCount AssertionNode::LiteralLength() const
  3626. {
  3627. return 0;
  3628. }
  3629. bool AssertionNode::IsCharOrPositiveSet() const
  3630. {
  3631. return false;
  3632. }
  3633. CharCount AssertionNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3634. {
  3635. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3636. return body->TransferPass0(compiler, litbuf);
  3637. }
  3638. void AssertionNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3639. {
  3640. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3641. body->TransferPass1(compiler, litbuf);
  3642. }
  3643. bool AssertionNode::IsRefiningAssertion(Compiler& compiler)
  3644. {
  3645. return !isNegation;
  3646. }
  3647. void AssertionNode::AnnotatePass0(Compiler& compiler)
  3648. {
  3649. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3650. isWord = false;
  3651. body->AnnotatePass0(compiler);
  3652. }
  3653. void AssertionNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3654. {
  3655. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3656. features = HasAssertion;
  3657. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, false, parentNotNegated && !isNegation);
  3658. features |= body->features;
  3659. thisConsumes.Exact(0);
  3660. if (isNegation)
  3661. firstSet = compiler.standardChars->GetFullSet();
  3662. else
  3663. firstSet = body->firstSet;
  3664. isFirstExact = false;
  3665. if (isNegation)
  3666. // This will always fail
  3667. isThisIrrefutable = false;
  3668. else
  3669. // If body is irrefutable overall assertion is irrefutable
  3670. isThisIrrefutable = body->isThisIrrefutable;
  3671. isThisWillNotProgress = true;
  3672. isThisWillNotRegress = true;
  3673. isNotInLoop = parentNotInLoop;
  3674. isAtLeastOnce = parentAtLeastOnce;
  3675. isNotSpeculative = parentNotSpeculative;
  3676. isNotNegated = parentNotNegated;
  3677. }
  3678. void AssertionNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3679. {
  3680. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3681. prevConsumes = accumConsumes;
  3682. isPrevWillNotProgress = accumPrevWillNotProgress;
  3683. isPrevWillNotRegress = accumPrevWillNotRegress;
  3684. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  3685. }
  3686. void AssertionNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3687. {
  3688. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3689. followConsumes = accumConsumes;
  3690. followSet = accumFollow;
  3691. isFollowIrrefutable = accumFollowIrrefutable;
  3692. isFollowEOL = accumFollowEOL;
  3693. // Can't say anything about what the assertion body will see at its end
  3694. CountDomain innerConsumes;
  3695. CharSet<Char>* innerFollow = compiler.standardChars->GetFullSet();
  3696. // We can never backtrack into the body of an assertion (the continuation stack is cut)
  3697. body->AnnotatePass3(compiler, innerConsumes, innerFollow, true, false);
  3698. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3699. }
  3700. void AssertionNode::AnnotatePass4(Compiler& compiler)
  3701. {
  3702. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3703. body->AnnotatePass4(compiler);
  3704. // Even if body is non-deterministic we cut the choicepoints on exit from the assertion,
  3705. // so overall assertion is deterministic.
  3706. isDeterministic = true;
  3707. //
  3708. // Compilation scheme: Fail
  3709. //
  3710. // If body is irrefutable, assertion will always fail (and will leave groups empty).
  3711. //
  3712. if (isNegation && body->isThisIrrefutable)
  3713. {
  3714. // ***COMMIT***
  3715. scheme = Fail;
  3716. return;
  3717. }
  3718. //
  3719. // Compilation scheme: Succ
  3720. //
  3721. // If body is irrefutable, assertion will always succeed. If it does not define groups
  3722. // we can eliminate it altogether.
  3723. //
  3724. if (!isNegation && body->isThisIrrefutable && !body->ContainsDefineGroup())
  3725. {
  3726. // **COMMIT**
  3727. scheme = Succ;
  3728. return;
  3729. }
  3730. //
  3731. // Compilation scheme: BeginEnd
  3732. //
  3733. scheme = BeginEnd;
  3734. }
  3735. bool AssertionNode::SupportsPrefixSkipping(Compiler& compiler) const
  3736. {
  3737. return false;
  3738. }
  3739. Node* AssertionNode::HeadSyncronizingNode(Compiler& compiler)
  3740. {
  3741. return 0;
  3742. }
  3743. CharCount AssertionNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3744. {
  3745. return 0;
  3746. }
  3747. void AssertionNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3748. {
  3749. Assert(false);
  3750. }
  3751. void AssertionNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3752. {
  3753. }
  3754. void AssertionNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3755. {
  3756. PROBE_STACK(scriptContext, Js::Constants::MinStackRegex);
  3757. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3758. }
  3759. void AssertionNode::Emit(Compiler& compiler, CharCount& skipped)
  3760. {
  3761. PROBE_STACK(compiler.scriptContext, Js::Constants::MinStackRegex);
  3762. Assert(skipped == 0);
  3763. switch (scheme)
  3764. {
  3765. case BeginEnd:
  3766. {
  3767. //
  3768. // Compilation scheme:
  3769. //
  3770. // BeginAssertion Lexit
  3771. // <body>
  3772. // EndAssertion
  3773. // Lexit:
  3774. //
  3775. int minBodyGroupId = compiler.program->numGroups;
  3776. int maxBodyGroupId = -1;
  3777. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3778. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginAssertionInst, isNegation, minBodyGroupId, maxBodyGroupId)->nextLabel);
  3779. body->Emit(compiler, skipped);
  3780. EMIT(compiler, EndAssertionInst);
  3781. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3782. break;
  3783. }
  3784. case Succ:
  3785. {
  3786. // Nothing to emit
  3787. break;
  3788. }
  3789. case Fail:
  3790. {
  3791. //
  3792. // Compilation scheme:
  3793. //
  3794. // Fail
  3795. //
  3796. EMIT(compiler, FailInst);
  3797. break;
  3798. }
  3799. }
  3800. }
  3801. CharCount AssertionNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3802. {
  3803. Assert(false);
  3804. return 0;
  3805. }
  3806. bool AssertionNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3807. {
  3808. return false;
  3809. }
  3810. bool AssertionNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3811. {
  3812. return false;
  3813. }
  3814. bool AssertionNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3815. {
  3816. Assert(false);
  3817. return false;
  3818. }
  3819. #if ENABLE_REGEX_CONFIG_OPTIONS
  3820. void AssertionNode::Print(DebugWriter* w, const Char* litbuf) const
  3821. {
  3822. w->PrintEOL(_u("Assertion(%s)"), isNegation ? _u("negative") : _u("positive"));
  3823. PrintAnnotations(w);
  3824. w->PrintEOL(_u("{"));
  3825. w->Indent();
  3826. body->Print(w, litbuf);
  3827. w->Unindent();
  3828. w->PrintEOL(_u("}"));
  3829. }
  3830. #endif
  3831. // ----------------------------------------------------------------------
  3832. // Compiler
  3833. // ----------------------------------------------------------------------
  3834. Compiler::Compiler
  3835. ( Js::ScriptContext* scriptContext
  3836. , ArenaAllocator* ctAllocator
  3837. , ArenaAllocator* rtAllocator
  3838. , StandardChars<Char>* standardChars
  3839. , Program* program
  3840. #if ENABLE_REGEX_CONFIG_OPTIONS
  3841. , DebugWriter* w
  3842. , RegexStats* stats
  3843. #endif
  3844. )
  3845. : scriptContext(scriptContext)
  3846. , ctAllocator(ctAllocator)
  3847. , rtAllocator(rtAllocator)
  3848. , standardChars(standardChars)
  3849. #if ENABLE_REGEX_CONFIG_OPTIONS
  3850. , w(w)
  3851. , stats(stats)
  3852. #endif
  3853. , program(program)
  3854. , instBuf(0)
  3855. , instLen(0)
  3856. , instNext(0)
  3857. , nextLoopId(0)
  3858. {}
  3859. void Compiler::CaptureNoLiterals(Program* program)
  3860. {
  3861. program->rep.insts.litbuf = 0;
  3862. program->rep.insts.litbufLen = 0;
  3863. }
  3864. void Compiler::CaptureLiterals(Node* root, const Char* litbuf)
  3865. {
  3866. // Program will own literal buffer. Prepare buffer and nodes for case-invariant matching if necessary.
  3867. CharCount finalLen = root->TransferPass0(*this, litbuf);
  3868. program->rep.insts.litbuf = finalLen == 0 ? 0 : RecyclerNewArrayLeaf(scriptContext->GetRecycler(), Char, finalLen);
  3869. program->rep.insts.litbufLen = 0;
  3870. root->TransferPass1(*this, litbuf);
  3871. Assert(program->rep.insts.litbufLen == finalLen);
  3872. }
  3873. void Compiler::EmitAndCaptureSuccInst(Recycler* recycler, Program* program)
  3874. {
  3875. program->rep.insts.insts = (uint8*)RecyclerNewLeaf(recycler, SuccInst);
  3876. program->rep.insts.instsLen = sizeof(SuccInst);
  3877. program->numLoops = 0;
  3878. }
  3879. void Compiler::CaptureInsts()
  3880. {
  3881. program->rep.insts.insts = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), uint8, instNext);
  3882. program->rep.insts.instsLen = instNext;
  3883. memcpy_s(program->rep.insts.insts, instNext, instBuf, instNext);
  3884. program->numLoops = nextLoopId;
  3885. }
  3886. void Compiler::FreeBody()
  3887. {
  3888. if (instBuf != 0)
  3889. {
  3890. ctAllocator->Free(instBuf, instLen);
  3891. instBuf = 0;
  3892. instLen = 0;
  3893. instNext = 0;
  3894. }
  3895. }
  3896. void Compiler::CompileEmptyRegex
  3897. ( Program* program
  3898. , RegexPattern* pattern
  3899. #if ENABLE_REGEX_CONFIG_OPTIONS
  3900. , DebugWriter* w
  3901. , RegexStats* stats
  3902. #endif
  3903. )
  3904. {
  3905. program->tag = Program::InstructionsTag;
  3906. CaptureNoLiterals(program);
  3907. EmitAndCaptureSuccInst(pattern->GetScriptContext()->GetRecycler(), program);
  3908. }
  3909. void Compiler::Compile
  3910. ( Js::ScriptContext* scriptContext
  3911. , ArenaAllocator* ctAllocator
  3912. , ArenaAllocator* rtAllocator
  3913. , StandardChars<Char>* standardChars
  3914. , Program *program
  3915. , Node* root
  3916. , const Char* litbuf
  3917. , RegexPattern* pattern
  3918. #if ENABLE_REGEX_CONFIG_OPTIONS
  3919. , DebugWriter* w
  3920. , RegexStats* stats
  3921. #endif
  3922. )
  3923. {
  3924. #if ENABLE_REGEX_CONFIG_OPTIONS
  3925. if (w != 0)
  3926. {
  3927. w->PrintEOL(_u("REGEX AST /%s/ {"), program->source);
  3928. w->Indent();
  3929. root->Print(w, litbuf);
  3930. w->Unindent();
  3931. w->PrintEOL(_u("}"));
  3932. w->Flush();
  3933. }
  3934. #endif
  3935. Compiler compiler
  3936. ( scriptContext
  3937. , ctAllocator
  3938. , rtAllocator
  3939. , standardChars
  3940. , program
  3941. #if ENABLE_REGEX_CONFIG_OPTIONS
  3942. , w
  3943. , stats
  3944. #endif
  3945. );
  3946. bool compiled = false;
  3947. if (REGEX_CONFIG_FLAG(RegexOptimize))
  3948. {
  3949. // SPECIAL CASE: Octoquad/trigrams
  3950. // (must handle before converting to case-insensitive form since the later interferes with octoquad pattern recognizer)
  3951. if (OctoquadIdentifier::Qualifies(program))
  3952. {
  3953. int numCodes;
  3954. char localCodeToChar[TrigramAlphabet::AlphaCount];
  3955. char localCharToCode[TrigramAlphabet::AsciiTableSize];
  3956. char (*codeToChar)[TrigramAlphabet::AlphaCount];
  3957. char (*charToCode)[TrigramAlphabet::AsciiTableSize];
  3958. TrigramAlphabet *trigramAlphabet = scriptContext->GetTrigramAlphabet();
  3959. if(trigramAlphabet)
  3960. {
  3961. numCodes = TrigramAlphabet::AlphaCount;
  3962. codeToChar = &trigramAlphabet->alpha;
  3963. charToCode = &trigramAlphabet->alphaBits;
  3964. }
  3965. else
  3966. {
  3967. numCodes = 0;
  3968. codeToChar = &localCodeToChar;
  3969. charToCode = &localCharToCode;
  3970. }
  3971. OctoquadIdentifier oi(numCodes, *codeToChar, *charToCode);
  3972. // We haven't captured literals yet: temporarily set the program's litbuf to be the parser's litbuf
  3973. Assert(program->rep.insts.litbuf == 0);
  3974. program->rep.insts.litbuf = (Char*)litbuf;
  3975. if (root->IsOctoquad(compiler, &oi) && oi.IsOctoquad())
  3976. {
  3977. program->rep.insts.litbuf = 0;
  3978. oi.InitializeTrigramInfo(scriptContext, pattern);
  3979. program->tag = Program::OctoquadTag;
  3980. program->rep.octoquad.matcher = OctoquadMatcher::New(scriptContext->GetRecycler(), standardChars, program->GetCaseMappingSource(), &oi);
  3981. compiled = true;
  3982. }
  3983. else
  3984. program->rep.insts.litbuf = 0;
  3985. }
  3986. }
  3987. if (!compiled)
  3988. {
  3989. if (REGEX_CONFIG_FLAG(RegexOptimize))
  3990. {
  3991. Char c;
  3992. if (root->IsSingleChar(compiler, c))
  3993. {
  3994. // SPECIAL CASE: c
  3995. program->tag = Program::SingleCharTag;
  3996. program->rep.singleChar.c = c;
  3997. }
  3998. else if (root->IsBoundedWord(compiler))
  3999. {
  4000. // SPECIAL CASE: \b\w+\b
  4001. program->tag = Program::BoundedWordTag;
  4002. }
  4003. else if (root->IsLeadingTrailingSpaces(compiler,
  4004. program->rep.leadingTrailingSpaces.beginMinMatch,
  4005. program->rep.leadingTrailingSpaces.endMinMatch))
  4006. {
  4007. // SPECIAL CASE: ^\s*|\s*$
  4008. program->tag = Program::LeadingTrailingSpacesTag;
  4009. }
  4010. else if (root->IsBOILiteral2(compiler))
  4011. {
  4012. program->tag = Program::BOILiteral2Tag;
  4013. program->rep.boiLiteral2.literal = *(DWORD *)litbuf;
  4014. }
  4015. else
  4016. {
  4017. program->tag = Program::InstructionsTag;
  4018. compiler.CaptureLiterals(root, litbuf);
  4019. root->AnnotatePass0(compiler);
  4020. root->AnnotatePass1(compiler, true, true, true, true);
  4021. // Nothing comes before or after overall pattern
  4022. CountDomain consumes(0);
  4023. // Match could progress from lhs (since we try successive start positions), but can never regress
  4024. root->AnnotatePass2(compiler, consumes, false, true);
  4025. // Anything could follow an end of pattern match
  4026. CharSet<Char>* follow = standardChars->GetFullSet();
  4027. root->AnnotatePass3(compiler, consumes, follow, true, false);
  4028. root->AnnotatePass4(compiler);
  4029. #if ENABLE_REGEX_CONFIG_OPTIONS
  4030. if (w != 0)
  4031. {
  4032. w->PrintEOL(_u("REGEX ANNOTATED AST /%s/ {"), program->source);
  4033. w->Indent();
  4034. root->Print(w, program->rep.insts.litbuf);
  4035. w->Unindent();
  4036. w->PrintEOL(_u("}"));
  4037. w->Flush();
  4038. }
  4039. #endif
  4040. CharCount skipped = 0;
  4041. // If the root Node has a hard fail BOI, we should not emit any synchronize Nodes
  4042. // since we can easily just search from the beginning.
  4043. if (root->hasInitialHardFailBOI == false)
  4044. {
  4045. // If the root Node doesn't have hard fail BOI but sticky flag is present don't synchronize Nodes
  4046. // since we can easily just search from the beginning. Instead set to special InstructionTag
  4047. if ((program->flags & StickyRegexFlag) != 0)
  4048. {
  4049. compiler.SetBOIInstructionsProgramForStickyFlagTag();
  4050. }
  4051. else
  4052. {
  4053. Node* bestSyncronizingNode = 0;
  4054. root->BestSyncronizingNode(compiler, bestSyncronizingNode);
  4055. Node* headSyncronizingNode = root->HeadSyncronizingNode(compiler);
  4056. if ((bestSyncronizingNode == 0 && headSyncronizingNode != 0) ||
  4057. (bestSyncronizingNode != 0 && headSyncronizingNode == bestSyncronizingNode))
  4058. {
  4059. // Scan and consume the head, continue with rest assuming head has been consumed
  4060. skipped = headSyncronizingNode->EmitScan(compiler, true);
  4061. }
  4062. else if (bestSyncronizingNode != 0)
  4063. {
  4064. // Scan for the synchronizing node, then backup ready for entire pattern
  4065. skipped = bestSyncronizingNode->EmitScan(compiler, false);
  4066. Assert(skipped == 0);
  4067. // We're synchronizing to a non-head node; if we have to back up, then try to synchronize to a character
  4068. // in the first set before running the remaining instructions
  4069. if (!bestSyncronizingNode->prevConsumes.CouldMatchEmpty()) // must back up at least one character
  4070. skipped = root->EmitScanFirstSet(compiler);
  4071. }
  4072. else
  4073. {
  4074. // Optionally scan for a character in the overall pattern's FIRST set, possibly consume it,
  4075. // then match all or remainder of pattern
  4076. skipped = root->EmitScanFirstSet(compiler);
  4077. }
  4078. }
  4079. }
  4080. root->Emit(compiler, skipped);
  4081. compiler.Emit<SuccInst>();
  4082. compiler.CaptureInsts();
  4083. }
  4084. }
  4085. else
  4086. {
  4087. program->tag = Program::InstructionsTag;
  4088. compiler.CaptureLiterals(root, litbuf);
  4089. CharCount skipped = 0;
  4090. root->Emit(compiler, skipped);
  4091. compiler.Emit<SuccInst>();
  4092. compiler.CaptureInsts();
  4093. }
  4094. }
  4095. #if ENABLE_REGEX_CONFIG_OPTIONS
  4096. if (w != 0)
  4097. {
  4098. w->PrintEOL(_u("REGEX PROGRAM /%s/ "), program->source);
  4099. program->Print(w);
  4100. w->Flush();
  4101. }
  4102. #endif
  4103. }
  4104. }