RegexCompileTime.cpp 180 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. namespace UnifiedRegex
  7. {
  8. // ----------------------------------------------------------------------
  9. // Compiler (inlines etc)
  10. // ----------------------------------------------------------------------
  11. // The VS2013 linker treats this as a redefinition of an already
  12. // defined constant and complains. So skip the declaration if we're compiling
  13. // with VS2013 or below.
  14. #if !defined(_MSC_VER) || _MSC_VER >= 1900
  15. const CharCount Compiler::initInstBufSize;
  16. #endif
  17. uint8* Compiler::Emit(size_t size)
  18. {
  19. Assert(size <= UINT32_MAX);
  20. if (instLen - instNext < size)
  21. {
  22. CharCount newLen = max(instLen, initInstBufSize);
  23. CharCount instLenPlus = (CharCount)(instLen + size - 1);
  24. // check for overflow
  25. if (instLenPlus < instLen || instLenPlus * 2 < instLenPlus)
  26. {
  27. Js::Throw::OutOfMemory();
  28. }
  29. while (newLen <= instLenPlus)
  30. {
  31. newLen *= 2;
  32. }
  33. instBuf = (uint8*)ctAllocator->Realloc(instBuf, instLen, newLen);
  34. instLen = newLen;
  35. }
  36. uint8* inst = instBuf + instNext;
  37. instNext += (CharCount)size;
  38. return inst;
  39. }
  40. template <typename T>
  41. T* Compiler::Emit()
  42. {
  43. return new(Emit(sizeof(T))) T;
  44. }
  45. #define EMIT(compiler, T, ...) (new (compiler.Emit(sizeof(T))) T(__VA_ARGS__))
  46. #define L2I(O, label) LabelToInstPointer<O##Inst>(Inst::InstTag::O, label)
  47. // Remember: The machine address of an instruction is no longer valid after a subsequent emit,
  48. // so all label fixups must be done using Compiler::GetFixup / Compiler::DoFixup
  49. // ----------------------------------------------------------------------
  50. // Node
  51. // ----------------------------------------------------------------------
  52. void Node::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  53. {
  54. Assert(false);
  55. }
  56. CharCount Node::EmitScanFirstSet(Compiler& compiler)
  57. {
  58. Assert(prevConsumes.IsExact(0));
  59. if (thisConsumes.CouldMatchEmpty())
  60. // Can't be sure of consuming something in FIRST
  61. return 0;
  62. if (firstSet->Count() > maxSyncToSetSize)
  63. // HEURISTIC: If FIRST is large we'll get too many false positives
  64. return 0;
  65. //
  66. // Compilation scheme:
  67. //
  68. // SyncTo(Char|Char2|Set)And(Consume|Continue)
  69. //
  70. Char entries[CharSet<Char>::MaxCompact];
  71. int count = firstSet->GetCompactEntries(2, entries);
  72. if (SupportsPrefixSkipping(compiler))
  73. {
  74. if (count == 1)
  75. EMIT(compiler, SyncToCharAndConsumeInst, entries[0]);
  76. else if (count == 2)
  77. EMIT(compiler, SyncToChar2SetAndConsumeInst, entries[0], entries[1]);
  78. else
  79. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  80. return 1;
  81. }
  82. else
  83. {
  84. if (count == 1)
  85. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  86. else if (count == 2)
  87. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  88. else
  89. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  90. return 0;
  91. }
  92. }
  93. bool Node::IsBetterSyncronizingNode(Compiler& compiler, Node* curr, Node* proposed)
  94. {
  95. int proposedNumLiterals = 0;
  96. CharCount proposedLength = proposed->MinSyncronizingLiteralLength(compiler, proposedNumLiterals);
  97. if (proposedLength == 0 || proposedNumLiterals > maxNumSyncLiterals)
  98. // Not a synchronizable node or too many literals.
  99. return false;
  100. if (curr == nullptr)
  101. // We'll take whatever we can get
  102. return true;
  103. int currNumLiterals = 0;
  104. CharCount currLength = curr->MinSyncronizingLiteralLength(compiler, currNumLiterals);
  105. // Lexicographic ordering based on
  106. // - whether literal length is above a threshold (above is better)
  107. // - number of literals (smaller is better)
  108. // - upper bound on backup (finite is better)
  109. // - minimum literal length (longer is better)
  110. // - actual backup upper bound (shorter is better)
  111. if (proposedLength >= preferredMinSyncToLiteralLength
  112. && currLength < preferredMinSyncToLiteralLength)
  113. {
  114. return true;
  115. }
  116. if (proposedLength < preferredMinSyncToLiteralLength
  117. && currLength >= preferredMinSyncToLiteralLength)
  118. {
  119. return false;
  120. }
  121. if (proposedNumLiterals < currNumLiterals)
  122. return true;
  123. if (proposedNumLiterals > currNumLiterals)
  124. return false;
  125. if (!proposed->prevConsumes.IsUnbounded() && curr->prevConsumes.IsUnbounded())
  126. return true;
  127. if (proposed->prevConsumes.IsUnbounded() && !curr->prevConsumes.IsUnbounded())
  128. return false;
  129. if (proposedLength > currLength)
  130. return true;
  131. if (proposedLength < currLength)
  132. return false;
  133. return proposed->prevConsumes.upper < curr->prevConsumes.upper;
  134. }
  135. bool Node::IsSingleChar(Compiler& compiler, Char& outChar) const
  136. {
  137. if (tag != Node::MatchChar)
  138. return false;
  139. const MatchCharNode* node = (const MatchCharNode*)this;
  140. if (node->isEquivClass)
  141. return false;
  142. outChar = node->cs[0];
  143. return true;
  144. }
  145. bool Node::IsBoundedWord(Compiler& compiler) const
  146. {
  147. if (tag != Node::Concat)
  148. return false;
  149. const ConcatNode* concatNode = (const ConcatNode *)this;
  150. if (concatNode->head->tag != Node::WordBoundary ||
  151. concatNode->tail == 0 ||
  152. concatNode->tail->head->tag != Node::Loop ||
  153. concatNode->tail->tail == 0 ||
  154. concatNode->tail->tail->head->tag != Node::WordBoundary ||
  155. concatNode->tail->tail->tail != 0)
  156. return false;
  157. const WordBoundaryNode* enter = (const WordBoundaryNode*)concatNode->head;
  158. const LoopNode* loop = (const LoopNode*)concatNode->tail->head;
  159. const WordBoundaryNode* leave = (const WordBoundaryNode*)concatNode->tail->tail->head;
  160. if (enter->isNegation ||
  161. !loop->isGreedy ||
  162. loop->repeats.lower != 1 ||
  163. loop->repeats.upper != CharCountFlag ||
  164. loop->body->tag != Node::MatchSet ||
  165. leave->isNegation)
  166. return false;
  167. const MatchSetNode* wordSet = (const MatchSetNode*)loop->body;
  168. if (wordSet->isNegation)
  169. return false;
  170. return wordSet->set.IsEqualTo(*compiler.standardChars->GetWordSet());
  171. }
  172. bool Node::IsBOILiteral2(Compiler& compiler) const
  173. {
  174. if (tag != Node::Concat)
  175. return false;
  176. const ConcatNode* concatNode = (const ConcatNode *)this;
  177. if ((compiler.program->flags & (IgnoreCaseRegexFlag | MultilineRegexFlag)) != 0 ||
  178. concatNode->head->tag != Node::BOL ||
  179. concatNode->tail == nullptr ||
  180. concatNode->tail->head->tag != Node::MatchLiteral ||
  181. concatNode->tail->tail != nullptr ||
  182. ((MatchLiteralNode *)concatNode->tail->head)->isEquivClass ||
  183. ((MatchLiteralNode *)concatNode->tail->head)->length != 2)
  184. {
  185. return false;
  186. }
  187. return true;
  188. }
  189. bool Node::IsLeadingTrailingSpaces(Compiler& compiler, CharCount& leftMinMatch, CharCount& rightMinMatch) const
  190. {
  191. if (tag != Node::Alt)
  192. return false;
  193. if (compiler.program->flags & MultilineRegexFlag)
  194. return false;
  195. const AltNode* altNode = (const AltNode*)this;
  196. if (altNode->head->tag != Node::Concat ||
  197. altNode->tail == 0 ||
  198. altNode->tail->head->tag != Node::Concat ||
  199. altNode->tail->tail != 0)
  200. return false;
  201. const ConcatNode* left = (const ConcatNode*)altNode->head;
  202. const ConcatNode* right = (const ConcatNode*)altNode->tail->head;
  203. if (left->head->tag != Node::BOL ||
  204. left->tail == 0 ||
  205. left->tail->head->tag != Node::Loop ||
  206. left->tail->tail != 0)
  207. return false;
  208. if (right->head->tag != Node::Loop ||
  209. right->tail == 0 ||
  210. right->tail->head->tag != Node::EOL ||
  211. right->tail->tail != 0)
  212. return false;
  213. const LoopNode* leftLoop = (const LoopNode*)left->tail->head;
  214. const LoopNode* rightLoop = (const LoopNode*)right->head;
  215. if (!leftLoop->isGreedy ||
  216. leftLoop->repeats.upper != CharCountFlag ||
  217. leftLoop->body->tag != Node::MatchSet ||
  218. !rightLoop->isGreedy ||
  219. rightLoop->repeats.upper != CharCountFlag ||
  220. rightLoop->body->tag != Node::MatchSet)
  221. return false;
  222. const MatchSetNode* leftSet = (const MatchSetNode*)leftLoop->body;
  223. const MatchSetNode* rightSet = (const MatchSetNode*)rightLoop->body;
  224. if (leftSet->isNegation ||
  225. rightSet->isNegation)
  226. return false;
  227. leftMinMatch = leftLoop->repeats.lower;
  228. rightMinMatch = rightLoop->repeats.lower;
  229. return
  230. leftSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet()) &&
  231. rightSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet());
  232. }
  233. #if ENABLE_REGEX_CONFIG_OPTIONS
  234. void Node::PrintAnnotations(DebugWriter* w) const
  235. {
  236. if (firstSet != 0)
  237. {
  238. w->PrintEOL(_u("<"));
  239. w->Indent();
  240. w->Print(_u("features: {"));
  241. bool first = true;
  242. for (uint i = Empty; i <= Assertion; i++)
  243. {
  244. if ((features & (1 << i)) != 0)
  245. {
  246. if (first)
  247. first = false;
  248. else
  249. w->Print(_u(","));
  250. switch (i)
  251. {
  252. case Empty: w->Print(_u("Empty")); break;
  253. case BOL: w->Print(_u("BOL")); break;
  254. case EOL: w->Print(_u("EOL")); break;
  255. case WordBoundary: w->Print(_u("WordBoundary")); break;
  256. case MatchLiteral: w->Print(_u("MatchLiteral")); break;
  257. case MatchChar: w->Print(_u("MatchChar")); break;
  258. case Concat: w->Print(_u("Concat")); break;
  259. case Alt: w->Print(_u("Alt")); break;
  260. case DefineGroup: w->Print(_u("DefineGroup")); break;
  261. case MatchGroup: w->Print(_u("MatchGroup")); break;
  262. case Loop: w->Print(_u("Loop")); break;
  263. case MatchSet: w->Print(_u("MatchSet")); break;
  264. case Assertion: w->Print(_u("Assertion")); break;
  265. }
  266. }
  267. }
  268. w->PrintEOL(_u("}"));
  269. w->Print(_u("firstSet: "));
  270. firstSet->Print(w);
  271. if (isFirstExact)
  272. w->Print(_u(" (exact)"));
  273. w->EOL();
  274. w->Print(_u("followSet: "));
  275. followSet->Print(w);
  276. w->EOL();
  277. w->Print(_u("prevConsumes: "));
  278. prevConsumes.Print(w);
  279. w->EOL();
  280. w->Print(_u("thisConsumes: "));
  281. thisConsumes.Print(w);
  282. w->EOL();
  283. w->Print(_u("followConsumes: "));
  284. followConsumes.Print(w);
  285. w->EOL();
  286. w->PrintEOL(_u("isThisIrrefutable: %s"), isThisIrrefutable ? _u("true") : _u("false"));
  287. w->PrintEOL(_u("isFollowIrrefutable: %s"), isFollowIrrefutable ? _u("true") : _u("false"));
  288. w->PrintEOL(_u("isWord: %s"), isWord ? _u("true") : _u("false"));
  289. w->PrintEOL(_u("isThisWillNotProgress: %s"), isThisWillNotProgress ? _u("true") : _u("false"));
  290. w->PrintEOL(_u("isThisWillNotRegress: %s"), isThisWillNotRegress ? _u("true") : _u("false"));
  291. w->PrintEOL(_u("isPrevWillNotProgress: %s"), isPrevWillNotProgress ? _u("true") : _u("false"));
  292. w->PrintEOL(_u("isPrevWillNotRegress: %s"), isPrevWillNotRegress ? _u("true") : _u("false"));
  293. w->PrintEOL(_u("isDeterministic: %s"), isDeterministic ? _u("true") : _u("false"));
  294. w->PrintEOL(_u("isNotInLoop: %s"), isNotInLoop ? _u("true") : _u("false"));
  295. w->PrintEOL(_u("isNotNegated: %s"), isNotNegated ? _u("true") : _u("false"));
  296. w->PrintEOL(_u("isAtLeastOnce: %s"), isAtLeastOnce ? _u("true") : _u("false"));
  297. w->PrintEOL(_u("hasInitialHardFailBOI: %s"), hasInitialHardFailBOI ? _u("true") : _u("false"));
  298. w->Unindent();
  299. w->PrintEOL(_u(">"));
  300. }
  301. }
  302. #endif
  303. // ----------------------------------------------------------------------
  304. // SimpleNode
  305. // ----------------------------------------------------------------------
  306. CharCount SimpleNode::LiteralLength() const
  307. {
  308. return 0;
  309. }
  310. bool SimpleNode::IsCharOrPositiveSet() const
  311. {
  312. return false;
  313. }
  314. CharCount SimpleNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  315. {
  316. return 0;
  317. }
  318. void SimpleNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  319. {
  320. }
  321. bool SimpleNode::IsRefiningAssertion(Compiler& compiler)
  322. {
  323. return tag == EOL && (compiler.program->flags & MultilineRegexFlag) != 0;
  324. }
  325. void SimpleNode::AnnotatePass0(Compiler& compiler)
  326. {
  327. isWord = false;
  328. }
  329. void SimpleNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  330. {
  331. isFirstExact = false;
  332. thisConsumes.Exact(0);
  333. isThisWillNotProgress = true;
  334. isThisWillNotRegress = true;
  335. isNotInLoop = parentNotInLoop;
  336. isAtLeastOnce = parentAtLeastOnce;
  337. isNotSpeculative = parentNotSpeculative;
  338. isNotNegated = parentNotNegated;
  339. switch (tag)
  340. {
  341. case Empty:
  342. features = HasEmpty;
  343. firstSet = compiler.standardChars->GetEmptySet();
  344. isThisIrrefutable = true;
  345. break;
  346. case BOL:
  347. features = HasBOL;
  348. firstSet = compiler.standardChars->GetFullSet();
  349. isThisIrrefutable = false;
  350. break;
  351. case EOL:
  352. features = HasEOL;
  353. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  354. firstSet = compiler.standardChars->GetNewlineSet();
  355. else
  356. firstSet = compiler.standardChars->GetEmptySet();
  357. isThisIrrefutable = false;
  358. break;
  359. default:
  360. Assert(false);
  361. }
  362. }
  363. void SimpleNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  364. {
  365. prevConsumes = accumConsumes;
  366. isPrevWillNotProgress = accumPrevWillNotProgress;
  367. isPrevWillNotRegress = accumPrevWillNotRegress;
  368. }
  369. void SimpleNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  370. {
  371. followConsumes = accumConsumes;
  372. followSet = accumFollow;
  373. isFollowIrrefutable = accumFollowIrrefutable;
  374. isFollowEOL = accumFollowEOL;
  375. hasInitialHardFailBOI = ((tag == BOL) &&
  376. prevConsumes.IsExact(0) &&
  377. (compiler.program->flags & MultilineRegexFlag) == 0 &&
  378. isAtLeastOnce &&
  379. isNotNegated &&
  380. isPrevWillNotRegress);
  381. }
  382. void SimpleNode::AnnotatePass4(Compiler& compiler)
  383. {
  384. isDeterministic = true;
  385. }
  386. bool SimpleNode::SupportsPrefixSkipping(Compiler& compiler) const
  387. {
  388. return false;
  389. }
  390. Node* SimpleNode::HeadSyncronizingNode(Compiler& compiler)
  391. {
  392. return 0;
  393. }
  394. CharCount SimpleNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  395. {
  396. return 0;
  397. }
  398. void SimpleNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  399. {
  400. Assert(false);
  401. }
  402. void SimpleNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  403. {
  404. }
  405. void SimpleNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  406. {
  407. }
  408. void SimpleNode::Emit(Compiler& compiler, CharCount& skipped)
  409. {
  410. Assert(skipped == 0);
  411. switch (tag)
  412. {
  413. case Empty:
  414. // Nothing
  415. break;
  416. case BOL:
  417. {
  418. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  419. {
  420. //
  421. // Compilation scheme:
  422. //
  423. // BOLTest
  424. //
  425. EMIT(compiler, BOLTestInst);
  426. }
  427. else
  428. {
  429. if (compiler.CurrentLabel() == 0)
  430. {
  431. // The first instruction is BOI, change the tag and only execute it once
  432. // without looping every start position
  433. compiler.SetBOIInstructionsProgramTag();
  434. }
  435. else
  436. {
  437. //
  438. // Compilation scheme:
  439. //
  440. // BOITest
  441. //
  442. // Obviously starting later in the string won't help, so can hard fail if:
  443. // - this pattern must always be matched
  444. // - not in a negative assertion
  445. // - backtracking could never rewind the input pointer
  446. //
  447. bool canHardFail = isAtLeastOnce && isNotNegated && isPrevWillNotRegress;
  448. if (canHardFail)
  449. {
  450. EMIT(compiler, BOITestInst<true>);
  451. }
  452. else
  453. {
  454. EMIT(compiler, BOITestInst<false>);
  455. }
  456. }
  457. }
  458. break;
  459. }
  460. case EOL:
  461. {
  462. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  463. {
  464. //
  465. // Compilation scheme:
  466. //
  467. // EOLTest
  468. //
  469. EMIT(compiler, EOLTestInst);
  470. }
  471. else
  472. {
  473. //
  474. // Compilation scheme:
  475. //
  476. // EOITest
  477. //
  478. // Can hard fail if
  479. // - this pattern must always be matched
  480. // - not in a negative assertion
  481. // - backtracking could never advance the input pointer
  482. //
  483. bool canHardFail = isAtLeastOnce && isNotNegated && isPrevWillNotProgress;
  484. if (canHardFail)
  485. {
  486. EMIT(compiler, EOITestInst<true>);
  487. }
  488. else
  489. {
  490. EMIT(compiler, EOITestInst<false>);
  491. }
  492. }
  493. break;
  494. }
  495. default:
  496. Assert(false);
  497. }
  498. }
  499. CharCount SimpleNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  500. {
  501. Assert(false);
  502. return 0;
  503. }
  504. bool SimpleNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  505. {
  506. return false;
  507. }
  508. bool SimpleNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  509. {
  510. return tag == Empty;
  511. }
  512. bool SimpleNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  513. {
  514. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  515. Assert(tag == Empty);
  516. if (cont == 0)
  517. {
  518. if (trie->Count() > 0)
  519. // This literal is a proper prefix of an earlier literal
  520. return false;
  521. trie->SetAccepting();
  522. return true;
  523. }
  524. return cont->BuildCharTrie(compiler, trie, 0, isAcceptFirst);
  525. }
  526. #if ENABLE_REGEX_CONFIG_OPTIONS
  527. void SimpleNode::Print(DebugWriter* w, const Char* litbuf) const
  528. {
  529. switch (tag)
  530. {
  531. case Empty:
  532. w->Print(_u("Empty")); break;
  533. case BOL:
  534. w->Print(_u("BOL")); break;
  535. case EOL:
  536. w->Print(_u("EOL")); break;
  537. default:
  538. Assert(false);
  539. }
  540. w->PrintEOL(_u("()"));
  541. PrintAnnotations(w);
  542. }
  543. #endif
  544. // ----------------------------------------------------------------------
  545. // WordBoundaryNode
  546. // ----------------------------------------------------------------------
  547. CharCount WordBoundaryNode::LiteralLength() const
  548. {
  549. return 0;
  550. }
  551. bool WordBoundaryNode::IsCharOrPositiveSet() const
  552. {
  553. return false;
  554. }
  555. CharCount WordBoundaryNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  556. {
  557. return 0;
  558. }
  559. void WordBoundaryNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  560. {
  561. // WordChars and NonWordChars sets are already case invariant
  562. }
  563. bool WordBoundaryNode::IsRefiningAssertion(Compiler& compiler)
  564. {
  565. return mustIncludeEntering != mustIncludeLeaving;
  566. }
  567. void WordBoundaryNode::AnnotatePass0(Compiler& compiler)
  568. {
  569. isWord = false;
  570. }
  571. void WordBoundaryNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  572. {
  573. features = HasWordBoundary;
  574. thisConsumes.Exact(0);
  575. isFirstExact = false;
  576. isThisIrrefutable = false;
  577. isThisWillNotProgress = true;
  578. isThisWillNotRegress = true;
  579. isNotInLoop = parentNotInLoop;
  580. isAtLeastOnce = parentAtLeastOnce;
  581. isNotSpeculative = parentNotSpeculative;
  582. isNotNegated = parentNotNegated;
  583. if (isNegation)
  584. firstSet = compiler.standardChars->GetFullSet();
  585. else
  586. {
  587. if (mustIncludeEntering && !mustIncludeLeaving)
  588. firstSet = compiler.standardChars->GetWordSet();
  589. else if (mustIncludeLeaving && !mustIncludeEntering)
  590. firstSet = compiler.standardChars->GetNonWordSet();
  591. else
  592. firstSet = compiler.standardChars->GetFullSet();
  593. }
  594. }
  595. void WordBoundaryNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  596. {
  597. prevConsumes = accumConsumes;
  598. isPrevWillNotProgress = accumPrevWillNotProgress;
  599. isPrevWillNotRegress = accumPrevWillNotRegress;
  600. }
  601. void WordBoundaryNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  602. {
  603. followConsumes = accumConsumes;
  604. followSet = accumFollow;
  605. isFollowIrrefutable = accumFollowIrrefutable;
  606. isFollowEOL = accumFollowEOL;
  607. }
  608. void WordBoundaryNode::AnnotatePass4(Compiler& compiler)
  609. {
  610. isDeterministic = true;
  611. }
  612. bool WordBoundaryNode::SupportsPrefixSkipping(Compiler& compiler) const
  613. {
  614. return false;
  615. }
  616. Node* WordBoundaryNode::HeadSyncronizingNode(Compiler& compiler)
  617. {
  618. return 0;
  619. }
  620. CharCount WordBoundaryNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  621. {
  622. return 0;
  623. }
  624. void WordBoundaryNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  625. {
  626. Assert(false);
  627. }
  628. void WordBoundaryNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  629. {
  630. }
  631. void WordBoundaryNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  632. {
  633. }
  634. void WordBoundaryNode::Emit(Compiler& compiler, CharCount& skipped)
  635. {
  636. Assert(skipped == 0);
  637. //
  638. // Compilation scheme:
  639. //
  640. // WordBoundaryTest
  641. //
  642. if (isNegation)
  643. {
  644. EMIT(compiler, WordBoundaryTestInst<true>);
  645. }
  646. else
  647. {
  648. EMIT(compiler, WordBoundaryTestInst<false>);
  649. }
  650. }
  651. CharCount WordBoundaryNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  652. {
  653. Assert(false);
  654. return 0;
  655. }
  656. bool WordBoundaryNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  657. {
  658. return false;
  659. }
  660. bool WordBoundaryNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  661. {
  662. return false;
  663. }
  664. bool WordBoundaryNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  665. {
  666. Assert(false);
  667. return false;
  668. }
  669. #if ENABLE_REGEX_CONFIG_OPTIONS
  670. void WordBoundaryNode::Print(DebugWriter* w, const Char* litbuf) const
  671. {
  672. w->PrintEOL(_u("WordBoundary(%s, %s, %s)"), isNegation ? _u("negative") : _u("positive"), mustIncludeEntering ? _u("entering") : _u("-"), mustIncludeLeaving ? _u("leaving") : _u("-"));
  673. PrintAnnotations(w);
  674. }
  675. #endif
  676. // ----------------------------------------------------------------------
  677. // MatchLiteralNode
  678. // ----------------------------------------------------------------------
  679. CharCount MatchLiteralNode::LiteralLength() const
  680. {
  681. return length;
  682. }
  683. void MatchLiteralNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  684. {
  685. // Called during parsing only, so literal always in original form
  686. Assert(!isEquivClass);
  687. Assert(litbufNext + length <= litbufLen && offset + length <= litbufLen);
  688. #pragma prefast(suppress:26000, "The error said that offset + length >= litbufLen + 1, which is incorrect due to if statement below.")
  689. if (litbufNext + length <= litbufLen && offset + length <= litbufLen) // for prefast
  690. {
  691. js_wmemcpy_s(litbuf + litbufNext, litbufLen - litbufNext, litbuf + offset, length);
  692. }
  693. litbufNext += length;
  694. }
  695. bool MatchLiteralNode::IsCharOrPositiveSet() const
  696. {
  697. return false;
  698. }
  699. CharCount MatchLiteralNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  700. {
  701. Assert(length > 1);
  702. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0
  703. && !compiler.standardChars->IsTrivialString(compiler.program->GetCaseMappingSource(), litbuf + offset, length))
  704. {
  705. // We'll need to expand each character of literal into its equivalence class
  706. isEquivClass = true;
  707. return UInt32Math::MulAdd<CaseInsensitive::EquivClassSize,0>(length);
  708. }
  709. else
  710. return length;
  711. }
  712. void MatchLiteralNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  713. {
  714. CharCount nextLit = compiler.program->rep.insts.litbufLen;
  715. if (isEquivClass)
  716. {
  717. Assert((compiler.program->flags & IgnoreCaseRegexFlag) != 0);
  718. // Expand literal according to character equivalence classes
  719. for (CharCount i = 0; i < length; i++)
  720. {
  721. compiler.standardChars->ToEquivs(
  722. compiler.program->GetCaseMappingSource(),
  723. litbuf[offset + i],
  724. compiler.program->rep.insts.litbuf + nextLit + i * CaseInsensitive::EquivClassSize);
  725. }
  726. compiler.program->rep.insts.litbufLen += length * CaseInsensitive::EquivClassSize;
  727. }
  728. else
  729. {
  730. for (CharCount i = 0; i < length; i++)
  731. compiler.program->rep.insts.litbuf[nextLit + i] = litbuf[offset + i];
  732. compiler.program->rep.insts.litbufLen += length;
  733. }
  734. offset = nextLit;
  735. }
  736. void MatchLiteralNode::AnnotatePass0(Compiler& compiler)
  737. {
  738. const Char* litbuf = compiler.program->rep.insts.litbuf;
  739. for (CharCount i = offset; i < offset + length; i++)
  740. {
  741. if (!compiler.standardChars->IsWord(litbuf[i]))
  742. {
  743. isWord = false;
  744. return;
  745. }
  746. }
  747. isWord = true;
  748. }
  749. bool MatchLiteralNode::IsRefiningAssertion(Compiler& compiler)
  750. {
  751. return false;
  752. }
  753. void MatchLiteralNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  754. {
  755. features = HasMatchLiteral;
  756. thisConsumes.Exact(length);
  757. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  758. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  759. firstSet->Set(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  760. isFirstExact = true;
  761. isThisIrrefutable = false;
  762. isThisWillNotProgress = true;
  763. isThisWillNotRegress = true;
  764. isNotInLoop = parentNotInLoop;
  765. isAtLeastOnce = parentAtLeastOnce;
  766. isNotSpeculative = parentNotSpeculative;
  767. isNotNegated = parentNotNegated;
  768. }
  769. void MatchLiteralNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  770. {
  771. prevConsumes = accumConsumes;
  772. isPrevWillNotProgress = accumPrevWillNotProgress;
  773. isPrevWillNotRegress = accumPrevWillNotRegress;
  774. }
  775. void MatchLiteralNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  776. {
  777. followConsumes = accumConsumes;
  778. followSet = accumFollow;
  779. isFollowIrrefutable = accumFollowIrrefutable;
  780. isFollowEOL = accumFollowEOL;
  781. }
  782. void MatchLiteralNode::AnnotatePass4(Compiler& compiler)
  783. {
  784. isDeterministic = true;
  785. }
  786. bool MatchLiteralNode::SupportsPrefixSkipping(Compiler& compiler) const
  787. {
  788. return true;
  789. }
  790. Node* MatchLiteralNode::HeadSyncronizingNode(Compiler& compiler)
  791. {
  792. return this;
  793. }
  794. CharCount MatchLiteralNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  795. {
  796. numLiterals++;
  797. return length;
  798. }
  799. void MatchLiteralNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  800. {
  801. ScannerMixin* scanner =
  802. scanners.Add(compiler.GetScriptContext()->GetRecycler(), compiler.GetProgram(), offset, length, isEquivClass);
  803. scanner->scanner.Setup(compiler.rtAllocator, compiler.program->rep.insts.litbuf + offset, length, isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  804. }
  805. void MatchLiteralNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  806. {
  807. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  808. bestNode = this;
  809. }
  810. void MatchLiteralNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  811. {
  812. }
  813. void MatchLiteralNode::Emit(Compiler& compiler, CharCount& skipped)
  814. {
  815. if (skipped >= length)
  816. {
  817. // Asking to skip entire literal
  818. skipped -= length;
  819. return;
  820. }
  821. //
  822. // Compilation scheme:
  823. //
  824. // Match(Char|Char4|Literal|LiteralEquiv)Inst
  825. //
  826. CharCount effectiveOffset = offset + skipped * (isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  827. CharCount effectiveLength = length - skipped;
  828. skipped -= min(skipped, length);
  829. if (effectiveLength == 1)
  830. {
  831. Char* cs = compiler.program->rep.insts.litbuf + effectiveOffset;
  832. MatchCharNode::Emit(compiler, cs, isEquivClass);
  833. }
  834. else
  835. {
  836. if (isEquivClass)
  837. EMIT(compiler, MatchLiteralEquivInst, effectiveOffset, effectiveLength);
  838. else
  839. EMIT(compiler, MatchLiteralInst, effectiveOffset, effectiveLength);
  840. }
  841. }
  842. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  843. CharCount MatchLiteralNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  844. {
  845. //
  846. // Compilation scheme:
  847. //
  848. // SyncTo(Literal|LiteralEquiv|LinearLiteral)And(Continue|Consume|Backup)
  849. //
  850. Char * litptr = compiler.program->rep.insts.litbuf + offset;
  851. if (isHeadSyncronizingNode)
  852. {
  853. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  854. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  855. if (isEquivClass)
  856. {
  857. const uint lastPatCharIndex = length - 1;
  858. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  859. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  860. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  861. {
  862. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  863. }
  864. else
  865. {
  866. EMIT(compiler, SyncToLiteralEquivAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  867. }
  868. }
  869. else if (length == 1)
  870. EMIT(compiler, SyncToCharAndConsumeInst, litptr[0]);
  871. else if (length == 2)
  872. EMIT(compiler, SyncToChar2LiteralAndConsumeInst, litptr[0], litptr[1]);
  873. else
  874. {
  875. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  876. switch (setup.GetScheme())
  877. {
  878. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  879. EMIT(compiler, SyncToLinearLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  880. break;
  881. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  882. EMIT(compiler, SyncToLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  883. break;
  884. };
  885. }
  886. return length;
  887. }
  888. else
  889. {
  890. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  891. // inside a group for instance, then we won't need to back up but we cannot consume the literal.
  892. if (prevConsumes.IsExact(0))
  893. {
  894. if (isEquivClass)
  895. {
  896. const uint lastPatCharIndex = length - 1;
  897. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  898. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  899. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  900. {
  901. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  902. }
  903. else
  904. {
  905. EMIT(compiler, SyncToLiteralEquivAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  906. }
  907. }
  908. else if (length == 1)
  909. EMIT(compiler, SyncToCharAndContinueInst, litptr[0]);
  910. else if (length == 2)
  911. EMIT(compiler, SyncToChar2LiteralAndContinueInst, litptr[0], litptr[1]);
  912. else
  913. {
  914. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  915. switch (setup.GetScheme())
  916. {
  917. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  918. EMIT(compiler, SyncToLinearLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  919. break;
  920. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  921. EMIT(compiler, SyncToLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  922. break;
  923. };
  924. }
  925. }
  926. else
  927. {
  928. if (isEquivClass)
  929. {
  930. const uint lastPatCharIndex = length - 1;
  931. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  932. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  933. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  934. {
  935. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  936. }
  937. else
  938. {
  939. EMIT(compiler, SyncToLiteralEquivAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  940. }
  941. }
  942. else if (length == 1)
  943. EMIT(compiler, SyncToCharAndBackupInst, litptr[0], prevConsumes);
  944. else if (length == 2)
  945. EMIT(compiler, SyncToChar2LiteralAndBackupInst, litptr[0], litptr[1], prevConsumes);
  946. else
  947. {
  948. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  949. switch (setup.GetScheme())
  950. {
  951. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  952. EMIT(compiler, SyncToLinearLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  953. break;
  954. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  955. EMIT(compiler, SyncToLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  956. break;
  957. };
  958. }
  959. }
  960. return 0;
  961. }
  962. }
  963. bool MatchLiteralNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  964. {
  965. // We look for octoquad patterns before converting for case-insensitivity
  966. Assert(!isEquivClass);
  967. if (!oi->CouldAppend(length))
  968. return false;
  969. for (CharCount i = 0; i < length; i++)
  970. {
  971. if (!oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), compiler.program->rep.insts.litbuf[offset + i])))
  972. return false;
  973. }
  974. return true;
  975. }
  976. bool MatchLiteralNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  977. {
  978. if (isEquivClass)
  979. // The literal would expand into length^3 alternatives
  980. return false;
  981. return true;
  982. }
  983. bool MatchLiteralNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  984. {
  985. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  986. Assert(!isEquivClass);
  987. CharTrie* tail = trie;
  988. for (CharCount i = 0; i < length; i++)
  989. {
  990. if (tail->IsAccepting())
  991. {
  992. // An earlier literal is a prefix of this literal
  993. // If isAcceptFirst, can ignore suffix of already recognized literal.
  994. // Otherwise, must fail.
  995. return isAcceptFirst;
  996. }
  997. CharTrie* newTail = tail->Add(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  998. if (tail->Count() > maxTrieArmExpansion)
  999. return false;
  1000. tail = newTail;
  1001. }
  1002. if (cont == 0)
  1003. {
  1004. if (tail->Count() > 0)
  1005. // This literal is a proper prefix of an earlier literal
  1006. return false;
  1007. tail->SetAccepting();
  1008. }
  1009. else
  1010. {
  1011. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1012. return false;
  1013. }
  1014. return true;
  1015. }
  1016. #if ENABLE_REGEX_CONFIG_OPTIONS
  1017. void MatchLiteralNode::Print(DebugWriter* w, const Char* litbuf) const
  1018. {
  1019. w->Print(_u("MatchLiteral("));
  1020. int skip = isEquivClass ? CaseInsensitive::EquivClassSize : 1;
  1021. for (int i = 0; i < skip; i++)
  1022. {
  1023. if (i > 0)
  1024. w->Print(_u(", "));
  1025. w->Print(_u("\""));
  1026. for (CharCount j = 0; j < length; j++)
  1027. w->PrintEscapedChar(litbuf[offset + j * skip + i]);
  1028. w->Print(_u("\""));
  1029. }
  1030. w->PrintEOL(_u(")"));
  1031. PrintAnnotations(w);
  1032. }
  1033. #endif
  1034. // ----------------------------------------------------------------------
  1035. // MatchCharNode
  1036. // ----------------------------------------------------------------------
  1037. CharCount MatchCharNode::LiteralLength() const
  1038. {
  1039. return 1;
  1040. }
  1041. void MatchCharNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  1042. {
  1043. Assert(!isEquivClass);
  1044. Assert(litbufNext + 1 <= litbufLen);
  1045. if (litbufNext + 1 <= litbufLen) // for prefast
  1046. litbuf[litbufNext++] = cs[0];
  1047. }
  1048. bool MatchCharNode::IsCharOrPositiveSet() const
  1049. {
  1050. return true;
  1051. }
  1052. CharCount MatchCharNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1053. {
  1054. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0)
  1055. {
  1056. // To ensure initialization, we first default-initialize the
  1057. // whole array with a constant, and then individually set it
  1058. // to be just the first character (known to exist). This can
  1059. // hopefully be optimized to just initialize to cs[0] by the
  1060. // compiler.
  1061. Char equivs[CaseInsensitive::EquivClassSize] = { (Char)-1 };
  1062. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1063. {
  1064. equivs[i] = cs[0];
  1065. }
  1066. bool isNonTrivial = compiler.standardChars->ToEquivs(compiler.program->GetCaseMappingSource(), cs[0], equivs);
  1067. if (isNonTrivial)
  1068. {
  1069. isEquivClass = true;
  1070. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1071. {
  1072. cs[i] = equivs[i];
  1073. }
  1074. }
  1075. }
  1076. return 0;
  1077. }
  1078. void MatchCharNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1079. {
  1080. }
  1081. bool MatchCharNode::IsRefiningAssertion(Compiler& compiler)
  1082. {
  1083. return false;
  1084. }
  1085. void MatchCharNode::AnnotatePass0(Compiler& compiler)
  1086. {
  1087. // If c is a word char then all characters equivalent to c are word chars
  1088. isWord = compiler.standardChars->IsWord(cs[0]);
  1089. }
  1090. void MatchCharNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1091. {
  1092. features = HasMatchChar;
  1093. thisConsumes.Exact(1);
  1094. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1095. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1096. firstSet->Set(compiler.ctAllocator, cs[i]);
  1097. isFirstExact = true;
  1098. isThisIrrefutable = false;
  1099. isThisWillNotProgress = true;
  1100. isThisWillNotRegress = true;
  1101. isNotInLoop = parentNotInLoop;
  1102. isAtLeastOnce = parentAtLeastOnce;
  1103. isNotSpeculative = parentNotSpeculative;
  1104. isNotNegated = parentNotNegated;
  1105. }
  1106. void MatchCharNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1107. {
  1108. prevConsumes = accumConsumes;
  1109. isPrevWillNotProgress = accumPrevWillNotProgress;
  1110. isPrevWillNotRegress = accumPrevWillNotRegress;
  1111. }
  1112. void MatchCharNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1113. {
  1114. followConsumes = accumConsumes;
  1115. followSet = accumFollow;
  1116. isFollowIrrefutable = accumFollowIrrefutable;
  1117. isFollowEOL = accumFollowEOL;
  1118. }
  1119. void MatchCharNode::AnnotatePass4(Compiler& compiler)
  1120. {
  1121. isDeterministic = true;
  1122. }
  1123. bool MatchCharNode::SupportsPrefixSkipping(Compiler& compiler) const
  1124. {
  1125. return true;
  1126. }
  1127. Node* MatchCharNode::HeadSyncronizingNode(Compiler& compiler)
  1128. {
  1129. return this;
  1130. }
  1131. CharCount MatchCharNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1132. {
  1133. numLiterals++;
  1134. return 1;
  1135. }
  1136. void MatchCharNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1137. {
  1138. Assert(false);
  1139. }
  1140. void MatchCharNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1141. {
  1142. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1143. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  1144. {
  1145. bestNode = this;
  1146. }
  1147. }
  1148. void MatchCharNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1149. {
  1150. }
  1151. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  1152. void MatchCharNode::Emit(Compiler& compiler, __in_ecount(4) Char * cs, bool isEquivClass)
  1153. {
  1154. if (isEquivClass)
  1155. {
  1156. // To ensure initialization, we first default-initialize the
  1157. // whole array with a constant, and then individually set it
  1158. // to be just the first character (known to exist). This can
  1159. // hopefully be optimized to just initialize to cs[0] by the
  1160. // compiler.
  1161. Char uniqueEquivs[CaseInsensitive::EquivClassSize] = { (Char)-1 };
  1162. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1163. {
  1164. uniqueEquivs[i] = cs[0];
  1165. }
  1166. CharCount uniqueEquivCount = FindUniqueEquivs(cs, uniqueEquivs);
  1167. switch (uniqueEquivCount)
  1168. {
  1169. case 1:
  1170. EMIT(compiler, MatchCharInst, uniqueEquivs[0]);
  1171. break;
  1172. case 2:
  1173. EMIT(compiler, MatchChar2Inst, uniqueEquivs[0], uniqueEquivs[1]);
  1174. break;
  1175. case 3:
  1176. EMIT(compiler, MatchChar3Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2]);
  1177. break;
  1178. default:
  1179. EMIT(compiler, MatchChar4Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2], uniqueEquivs[3]);
  1180. break;
  1181. }
  1182. }
  1183. else
  1184. EMIT(compiler, MatchCharInst, cs[0]);
  1185. }
  1186. CharCount MatchCharNode::FindUniqueEquivs(const Char equivs[CaseInsensitive::EquivClassSize], __out_ecount(4) Char uniqueEquivs[CaseInsensitive::EquivClassSize])
  1187. {
  1188. uniqueEquivs[0] = equivs[0];
  1189. CharCount uniqueCount = 1;
  1190. for (CharCount equivIndex = 1; equivIndex < CaseInsensitive::EquivClassSize; ++equivIndex)
  1191. {
  1192. bool alreadyHave = false;
  1193. for (CharCount uniqueIndex = 0; uniqueIndex < uniqueCount; ++uniqueIndex)
  1194. {
  1195. if (uniqueEquivs[uniqueIndex] == equivs[equivIndex])
  1196. {
  1197. alreadyHave = true;
  1198. break;
  1199. }
  1200. }
  1201. if (!alreadyHave)
  1202. {
  1203. uniqueEquivs[uniqueCount] = equivs[equivIndex];
  1204. uniqueCount += 1;
  1205. }
  1206. }
  1207. return uniqueCount;
  1208. }
  1209. void MatchCharNode::Emit(Compiler& compiler, CharCount& skipped)
  1210. {
  1211. if (skipped >= 1)
  1212. {
  1213. // Asking to skip entire char
  1214. skipped--;
  1215. return;
  1216. }
  1217. //
  1218. // Compilation scheme:
  1219. //
  1220. // MatchChar(2|3|4)?
  1221. //
  1222. skipped -= min(skipped, static_cast<CharCount>(1));
  1223. Emit(compiler, cs, isEquivClass);
  1224. }
  1225. CharCount MatchCharNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1226. {
  1227. //
  1228. // Compilation scheme:
  1229. //
  1230. // SyncTo(Char|Char2Set|Set)And(Consume|Continue|Backup)
  1231. //
  1232. if (isHeadSyncronizingNode)
  1233. {
  1234. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1235. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1236. if (firstSet->IsSingleton())
  1237. EMIT(compiler, SyncToCharAndConsumeInst, firstSet->Singleton());
  1238. else
  1239. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1240. return 1;
  1241. }
  1242. else
  1243. {
  1244. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1245. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1246. // back up, we can use SyncToCharAndContinue instead.
  1247. if (prevConsumes.IsExact(0))
  1248. {
  1249. Char entries[CharSet<Char>::MaxCompact];
  1250. int count = firstSet->GetCompactEntries(2, entries);
  1251. if (count == 1)
  1252. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  1253. else if (count == 2)
  1254. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  1255. else
  1256. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1257. }
  1258. else
  1259. {
  1260. if (firstSet->IsSingleton())
  1261. EMIT(compiler, SyncToCharAndBackupInst, firstSet->Singleton(), prevConsumes);
  1262. else
  1263. EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1264. }
  1265. return 0;
  1266. }
  1267. }
  1268. bool MatchCharNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1269. {
  1270. // We look for octoquad patterns before converting for case-insensitivity
  1271. Assert(!isEquivClass);
  1272. return oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), cs[0]));
  1273. }
  1274. bool MatchCharNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1275. {
  1276. if (isEquivClass)
  1277. {
  1278. accNumAlts *= CaseInsensitive::EquivClassSize;
  1279. if (accNumAlts > maxTrieArmExpansion)
  1280. return false;
  1281. }
  1282. return true;
  1283. }
  1284. bool MatchCharNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1285. {
  1286. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1287. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1288. {
  1289. if (trie->IsAccepting())
  1290. {
  1291. // An earlier literal is a prefix of this literal
  1292. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1293. // Otherwise, must fail.
  1294. return isAcceptFirst;
  1295. }
  1296. CharTrie* tail = trie->Add(compiler.ctAllocator, cs[i]);
  1297. if (trie->Count() > maxTrieArmExpansion)
  1298. return false;
  1299. if (cont == 0)
  1300. {
  1301. if (tail->Count() > 0)
  1302. // This literal is a proper prefix of an earlier literal
  1303. return false;
  1304. tail->SetAccepting();
  1305. }
  1306. else
  1307. {
  1308. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1309. return false;
  1310. }
  1311. }
  1312. return true;
  1313. }
  1314. #if ENABLE_REGEX_CONFIG_OPTIONS
  1315. void MatchCharNode::Print(DebugWriter* w, const Char* litbuf) const
  1316. {
  1317. w->Print(_u("MatchChar("));
  1318. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1319. {
  1320. if (i > 0)
  1321. w->Print(_u(", "));
  1322. w->PrintQuotedChar(cs[i]);
  1323. }
  1324. w->PrintEOL(_u(")"));
  1325. PrintAnnotations(w);
  1326. }
  1327. #endif
  1328. // ----------------------------------------------------------------------
  1329. // MatchSetNode
  1330. // ----------------------------------------------------------------------
  1331. CharCount MatchSetNode::LiteralLength() const
  1332. {
  1333. return 0;
  1334. }
  1335. bool MatchSetNode::IsCharOrPositiveSet() const
  1336. {
  1337. return !isNegation;
  1338. }
  1339. CharCount MatchSetNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1340. {
  1341. return 0;
  1342. }
  1343. void MatchSetNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1344. {
  1345. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0 && this->needsEquivClass)
  1346. {
  1347. // If the set is negated, then at this point we could:
  1348. // (1) take each char in the set to its equivalence class and complement it
  1349. // (2) complement the set and take each char to its equivalence class
  1350. // Thankfully the spec demands (1), so we don't need to actually calculate any complement, just
  1351. // retain the isNegated flag
  1352. CharSet<Char> newSet;
  1353. // First include all the existing characters in the result set so that large ranges such as \x80-\uffff
  1354. // don't temporarily turn into a large number of small ranges corresponding to the various equivalent
  1355. // characters
  1356. newSet.UnionInPlace(compiler.rtAllocator, set);
  1357. set.ToEquivClass(compiler.rtAllocator, newSet);
  1358. set = newSet;
  1359. }
  1360. }
  1361. bool MatchSetNode::IsRefiningAssertion(Compiler& compiler)
  1362. {
  1363. return false;
  1364. }
  1365. void MatchSetNode::AnnotatePass0(Compiler& compiler)
  1366. {
  1367. if (isNegation || set.IsEmpty())
  1368. isWord = false;
  1369. else
  1370. isWord = set.IsSubsetOf(*compiler.standardChars->GetWordSet());
  1371. }
  1372. void MatchSetNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1373. {
  1374. features = HasMatchSet;
  1375. if (isNegation)
  1376. {
  1377. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1378. set.ToComplement(compiler.ctAllocator, *firstSet);
  1379. }
  1380. else
  1381. {
  1382. // CAUTION:
  1383. // Be careful not to use firstSet after deleting the node.
  1384. firstSet = &set;
  1385. }
  1386. isFirstExact = true;
  1387. // If firstSet is empty then pattern will always fail
  1388. thisConsumes.Exact(firstSet->IsEmpty() ? 0 : 1);
  1389. isThisIrrefutable = false;
  1390. isThisWillNotProgress = true;
  1391. isThisWillNotRegress = true;
  1392. isNotInLoop = parentNotInLoop;
  1393. isAtLeastOnce = parentAtLeastOnce;
  1394. isNotSpeculative = parentNotSpeculative;
  1395. isNotNegated = parentNotNegated;
  1396. }
  1397. void MatchSetNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1398. {
  1399. prevConsumes = accumConsumes;
  1400. isPrevWillNotProgress = accumPrevWillNotProgress;
  1401. isPrevWillNotRegress = accumPrevWillNotRegress;
  1402. }
  1403. void MatchSetNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1404. {
  1405. followConsumes = accumConsumes;
  1406. followSet = accumFollow;
  1407. isFollowIrrefutable = accumFollowIrrefutable;
  1408. isFollowEOL = accumFollowEOL;
  1409. }
  1410. void MatchSetNode::AnnotatePass4(Compiler& compiler)
  1411. {
  1412. isDeterministic = true;
  1413. }
  1414. bool MatchSetNode::SupportsPrefixSkipping(Compiler& compiler) const
  1415. {
  1416. return true;
  1417. }
  1418. Node* MatchSetNode::HeadSyncronizingNode(Compiler& compiler)
  1419. {
  1420. return this;
  1421. }
  1422. CharCount MatchSetNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1423. {
  1424. return 0;
  1425. }
  1426. void MatchSetNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1427. {
  1428. Assert(false);
  1429. }
  1430. void MatchSetNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1431. {
  1432. }
  1433. void MatchSetNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1434. {
  1435. }
  1436. void MatchSetNode::Emit(Compiler& compiler, CharCount& skipped)
  1437. {
  1438. if (skipped >= 1)
  1439. {
  1440. // Asking to skip entire set
  1441. skipped--;
  1442. return;
  1443. }
  1444. //
  1445. // Compilation scheme:
  1446. //
  1447. // MatchSet
  1448. //
  1449. skipped -= min(skipped, static_cast<CharCount>(1));
  1450. RuntimeCharSet<Char> *runtimeSet;
  1451. if(isNegation)
  1452. runtimeSet = &EMIT(compiler, MatchSetInst<true>)->set;
  1453. else
  1454. runtimeSet = &EMIT(compiler, MatchSetInst<false>)->set;
  1455. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1456. }
  1457. CharCount MatchSetNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1458. {
  1459. //
  1460. // Compilation scheme:
  1461. //
  1462. // SyncToSetAnd(Consume|Continue|Backup)
  1463. //
  1464. RuntimeCharSet<Char> *runtimeSet;
  1465. CharCount consumedChars;
  1466. if (isHeadSyncronizingNode)
  1467. {
  1468. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1469. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1470. if(isNegation)
  1471. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<true>)->set;
  1472. else
  1473. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<false>)->set;
  1474. consumedChars = 1;
  1475. }
  1476. else
  1477. {
  1478. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1479. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1480. // back up, we still cannot use SyncToSetAndContinue like in MatchCharNode::EmitScan, since SyncToSetAndContinue does not support negation
  1481. // sets.
  1482. if(prevConsumes.IsExact(0))
  1483. {
  1484. if(isNegation)
  1485. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<true>)->set;
  1486. else
  1487. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<false>)->set;
  1488. }
  1489. else if(isNegation)
  1490. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<true>, prevConsumes)->set;
  1491. else
  1492. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set;
  1493. consumedChars = 0;
  1494. }
  1495. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1496. return consumedChars;
  1497. }
  1498. bool MatchSetNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1499. {
  1500. if (isNegation || set.IsEmpty() || !oi->BeginUnions())
  1501. return false;
  1502. Assert(CharSet<Char>::MaxCompact >= TrigramAlphabet::AlphaCount);
  1503. Char entries[CharSet<Char>::MaxCompact];
  1504. int count = set.GetCompactEntries(TrigramAlphabet::AlphaCount, entries);
  1505. if (count < 0)
  1506. // Too many unique characters
  1507. return false;
  1508. for (int i = 0; i < count; i++)
  1509. {
  1510. if (!oi->UnionChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), entries[i])))
  1511. // Too many unique characters
  1512. return false;
  1513. }
  1514. oi->EndUnions(); // this doesn't need to be called if we return false earlier since the OctoquadPattern won't be used
  1515. return true;
  1516. }
  1517. bool MatchSetNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1518. {
  1519. if (isNegation || !set.IsCompact())
  1520. return false;
  1521. const uint count = set.Count();
  1522. if(count == 0)
  1523. return false; // empty set always fails and consumes nothing, and therefore is not a char-trie arm
  1524. accNumAlts *= count;
  1525. if (accNumAlts > maxTrieArmExpansion)
  1526. return false;
  1527. return true;
  1528. }
  1529. bool MatchSetNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1530. {
  1531. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1532. Assert(!isNegation && set.IsCompact());
  1533. Char entries[CharSet<Char>::MaxCompact];
  1534. int count = set.GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  1535. Assert(count > 0);
  1536. for (int i = 0; i < count; i++)
  1537. {
  1538. if (trie->IsAccepting())
  1539. {
  1540. // An earlier literal is a prefix of this literal
  1541. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1542. // Otherwise, must fail.
  1543. return isAcceptFirst;
  1544. }
  1545. CharTrie* tail = trie->Add(compiler.ctAllocator, entries[i]);
  1546. if (trie->Count() > maxTrieArmExpansion)
  1547. return false;
  1548. if (cont == 0)
  1549. {
  1550. if (tail->Count() > 0)
  1551. // This literal is a proper prefix of an earlier literal
  1552. return false;
  1553. tail->SetAccepting();
  1554. }
  1555. else
  1556. {
  1557. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1558. return false;
  1559. }
  1560. }
  1561. return true;
  1562. }
  1563. #if ENABLE_REGEX_CONFIG_OPTIONS
  1564. void MatchSetNode::Print(DebugWriter* w, const Char* litbuf) const
  1565. {
  1566. w->Print(_u("MatchSet(%s, "), isNegation ? _u("negative") : _u("positive"));
  1567. set.Print(w);
  1568. w->PrintEOL(_u(")"));
  1569. PrintAnnotations(w);
  1570. }
  1571. #endif
  1572. // ----------------------------------------------------------------------
  1573. // ConcatNode
  1574. // ----------------------------------------------------------------------
  1575. CharCount ConcatNode::LiteralLength() const
  1576. {
  1577. return 0;
  1578. }
  1579. bool ConcatNode::IsCharOrPositiveSet() const
  1580. {
  1581. return false;
  1582. }
  1583. CharCount ConcatNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1584. {
  1585. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1586. Assert(tail != 0);
  1587. CharCount n = 0;
  1588. #if DBG
  1589. ConcatNode* prev = 0;
  1590. #endif
  1591. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1592. {
  1593. Assert(curr->head->tag != Concat);
  1594. Assert(prev == 0 || !(prev->head->LiteralLength() > 0 && curr->head->LiteralLength() > 0));
  1595. n = UInt32Math::Add(n, curr->head->TransferPass0(compiler, litbuf));
  1596. #if DBG
  1597. prev = curr;
  1598. #endif
  1599. }
  1600. return n;
  1601. }
  1602. void ConcatNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1603. {
  1604. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1605. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1606. curr->head->TransferPass1(compiler, litbuf);
  1607. }
  1608. bool ConcatNode::IsRefiningAssertion(Compiler& compiler)
  1609. {
  1610. return false;
  1611. }
  1612. void ConcatNode::AnnotatePass0(Compiler& compiler)
  1613. {
  1614. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1615. Node* prev = 0;
  1616. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1617. {
  1618. curr->head->AnnotatePass0(compiler);
  1619. if (prev != 0)
  1620. {
  1621. if (curr->head->tag == WordBoundary && prev->isWord)
  1622. {
  1623. WordBoundaryNode* wb = (WordBoundaryNode*)curr->head;
  1624. wb->mustIncludeLeaving = true;
  1625. }
  1626. else if (prev->tag == WordBoundary && curr->head->isWord)
  1627. {
  1628. WordBoundaryNode* wb = (WordBoundaryNode*)prev;
  1629. wb->mustIncludeEntering = true;
  1630. }
  1631. }
  1632. prev = curr->head;
  1633. }
  1634. }
  1635. void ConcatNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1636. {
  1637. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1638. features = HasConcat;
  1639. isNotInLoop = parentNotInLoop;
  1640. isAtLeastOnce = parentAtLeastOnce;
  1641. isNotSpeculative = parentNotSpeculative;
  1642. isNotNegated = parentNotNegated;
  1643. // Pass 1: Count items
  1644. int n = 0;
  1645. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1646. n++;
  1647. // Pass 2: Annotate each item, accumulate features, consumes, find longest prefix of possibly-empty-accepting items,
  1648. // check if all items are irrefutable
  1649. int emptyPrefix = 0;
  1650. thisConsumes.Exact(0);
  1651. isThisIrrefutable = true;
  1652. isThisWillNotProgress = true;
  1653. isThisWillNotRegress = true;
  1654. int item = 0;
  1655. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1656. {
  1657. curr->head->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, isNotNegated);
  1658. features |= curr->head->features;
  1659. thisConsumes.Add(curr->head->thisConsumes);
  1660. if (!curr->head->isThisIrrefutable)
  1661. isThisIrrefutable = false;
  1662. if (!curr->head->isThisWillNotProgress)
  1663. isThisWillNotProgress = false;
  1664. if (!curr->head->isThisWillNotRegress)
  1665. isThisWillNotRegress = false;
  1666. if (emptyPrefix == item && curr->head->thisConsumes.CouldMatchEmpty())
  1667. emptyPrefix++;
  1668. }
  1669. if (emptyPrefix == 0)
  1670. {
  1671. firstSet = head->firstSet;
  1672. isFirstExact = head->isFirstExact;
  1673. }
  1674. else
  1675. {
  1676. // Pass 3: Overall first set is union of first's of emptyPrefx
  1677. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1678. isFirstExact = true;
  1679. item = 0;
  1680. for (ConcatNode* curr = this; item <= min(emptyPrefix, n - 1); curr = curr->tail, item++)
  1681. {
  1682. AnalysisAssert(curr);
  1683. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1684. if (!curr->head->isFirstExact)
  1685. isFirstExact = false;
  1686. }
  1687. }
  1688. }
  1689. void ConcatNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1690. {
  1691. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1692. prevConsumes = accumConsumes;
  1693. isPrevWillNotProgress = accumPrevWillNotProgress;
  1694. isPrevWillNotRegress = accumPrevWillNotRegress;
  1695. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1696. {
  1697. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1698. accumConsumes.Add(curr->head->thisConsumes);
  1699. if (!curr->head->isThisWillNotProgress)
  1700. accumPrevWillNotProgress = false;
  1701. if (!curr->head->isThisWillNotRegress)
  1702. accumPrevWillNotRegress = false;
  1703. }
  1704. }
  1705. void ConcatNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1706. {
  1707. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1708. followConsumes = accumConsumes;
  1709. followSet = accumFollow;
  1710. isFollowIrrefutable = accumFollowIrrefutable;
  1711. isFollowEOL = accumFollowEOL;
  1712. // Pass 1: Count items
  1713. int n = 0;
  1714. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1715. n++;
  1716. // Pass 2: Collect items so we can enumerate backwards
  1717. Node** nodes = AnewArray(compiler.ctAllocator, Node *, n);
  1718. int item = 0;
  1719. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1720. nodes[item] = curr->head;
  1721. // Pass 3: Work backwards propagating follow set, irrefutability and FollowEndLineOrPattern, and adding consumes
  1722. CharSet<Char>* innerFollow = accumFollow;
  1723. for (item = n - 1; item >= 0; item--)
  1724. {
  1725. nodes[item]->AnnotatePass3(compiler, accumConsumes, innerFollow, accumFollowIrrefutable, accumFollowEOL);
  1726. if (!nodes[item]->isThisIrrefutable)
  1727. accumFollowIrrefutable = false;
  1728. if (!nodes[item]->IsEmptyOnly() && (compiler.program->flags & MultilineRegexFlag) == 0)
  1729. accumFollowEOL = nodes[item]->tag == EOL;
  1730. // ConcatNode has hardfail BOI test if any child has hardfail BOI
  1731. hasInitialHardFailBOI = hasInitialHardFailBOI || nodes[item]->hasInitialHardFailBOI;
  1732. if (item > 0)
  1733. {
  1734. CharSet<Char>* nextInnerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  1735. if (nodes[item]->thisConsumes.CouldMatchEmpty() && !nodes[item]->IsRefiningAssertion(compiler))
  1736. {
  1737. // Later follows can shine through this item to the previous item
  1738. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *innerFollow);
  1739. }
  1740. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *nodes[item]->firstSet);
  1741. innerFollow = nextInnerFollow;
  1742. accumConsumes.Add(nodes[item]->thisConsumes);
  1743. }
  1744. }
  1745. }
  1746. void ConcatNode::AnnotatePass4(Compiler& compiler)
  1747. {
  1748. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1749. isDeterministic = true;
  1750. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1751. {
  1752. curr->head->AnnotatePass4(compiler);
  1753. if (!curr->head->isDeterministic)
  1754. isDeterministic = false;
  1755. }
  1756. }
  1757. bool ConcatNode::SupportsPrefixSkipping(Compiler& compiler) const
  1758. {
  1759. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1760. int prefix = 0;
  1761. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1762. {
  1763. if (curr->head->SupportsPrefixSkipping(compiler))
  1764. prefix++;
  1765. else
  1766. break;
  1767. }
  1768. return prefix > 0;
  1769. }
  1770. Node* ConcatNode::HeadSyncronizingNode(Compiler& compiler)
  1771. {
  1772. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1773. return head->HeadSyncronizingNode(compiler);
  1774. }
  1775. void ConcatNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1776. {
  1777. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  1778. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1779. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  1780. }
  1781. CharCount ConcatNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1782. {
  1783. return 0;
  1784. }
  1785. void ConcatNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1786. {
  1787. Assert(false);
  1788. }
  1789. void ConcatNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1790. {
  1791. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1792. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1793. curr->head->BestSyncronizingNode(compiler, bestNode);
  1794. }
  1795. void ConcatNode::Emit(Compiler& compiler, CharCount& skipped)
  1796. {
  1797. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1798. //
  1799. // Compilation scheme:
  1800. //
  1801. // <item 1>
  1802. // ...
  1803. // <item n>
  1804. //
  1805. // :-)
  1806. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1807. curr->head->Emit(compiler, skipped);
  1808. }
  1809. CharCount ConcatNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1810. {
  1811. Assert(false);
  1812. return 0;
  1813. }
  1814. bool ConcatNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1815. {
  1816. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1817. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1818. {
  1819. if (!curr->head->IsOctoquad(compiler, oi))
  1820. return false;
  1821. }
  1822. return true;
  1823. }
  1824. bool ConcatNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1825. {
  1826. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1827. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1828. {
  1829. if (!curr->head->IsCharTrieArm(compiler, accNumAlts))
  1830. return false;
  1831. }
  1832. return true;
  1833. }
  1834. bool ConcatNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1835. {
  1836. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1837. if (cont != 0)
  1838. // We don't want to manage a stack of continuations
  1839. return false;
  1840. // NOTE: This is the only place we use an internal node of a concat sequence as a sub-concat sequence
  1841. return head->BuildCharTrie(compiler, trie, tail, isAcceptFirst);
  1842. }
  1843. #if ENABLE_REGEX_CONFIG_OPTIONS
  1844. void ConcatNode::Print(DebugWriter* w, const Char* litbuf) const
  1845. {
  1846. w->PrintEOL(_u("Concat()"));
  1847. PrintAnnotations(w);
  1848. w->PrintEOL(_u("{"));
  1849. w->Indent();
  1850. for (const ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1851. curr->head->Print(w, litbuf);
  1852. w->Unindent();
  1853. w->PrintEOL(_u("}"));
  1854. }
  1855. #endif
  1856. // ----------------------------------------------------------------------
  1857. // AltNode
  1858. // ----------------------------------------------------------------------
  1859. CharCount AltNode::LiteralLength() const
  1860. {
  1861. return 0;
  1862. }
  1863. bool AltNode::IsCharOrPositiveSet() const
  1864. {
  1865. return false;
  1866. }
  1867. CharCount AltNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1868. {
  1869. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1870. Assert(tail != 0);
  1871. CharCount n = 0;
  1872. #if DBG
  1873. AltNode* prev = 0;
  1874. #endif
  1875. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1876. {
  1877. Assert(curr->head->tag != Alt);
  1878. Assert(prev == 0 || !(prev->head->IsCharOrPositiveSet() && curr->head->IsCharOrPositiveSet()));
  1879. n = UInt32Math::Add(n, curr->head->TransferPass0(compiler, litbuf));
  1880. #if DBG
  1881. prev = curr;
  1882. #endif
  1883. }
  1884. return n;
  1885. }
  1886. void AltNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1887. {
  1888. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1889. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  1890. curr->head->TransferPass1(compiler, litbuf);
  1891. }
  1892. bool AltNode::IsRefiningAssertion(Compiler& compiler)
  1893. {
  1894. return false;
  1895. }
  1896. void AltNode::AnnotatePass0(Compiler& compiler)
  1897. {
  1898. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1899. isWord = true;
  1900. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1901. {
  1902. curr->head->AnnotatePass0(compiler);
  1903. if (!curr->head->isWord)
  1904. isWord = false;
  1905. }
  1906. }
  1907. void AltNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1908. {
  1909. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1910. features = HasAlt;
  1911. isNotInLoop = parentNotInLoop;
  1912. isAtLeastOnce = parentAtLeastOnce;
  1913. isNotSpeculative = parentNotSpeculative;
  1914. isNotNegated = parentNotNegated;
  1915. // Overall alternative:
  1916. // - is irrefutable if at least one item is irrefutable
  1917. // - will not progress(regress) if each item will not progress(regress) and has strictly decreasing(increasing) consumes
  1918. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1919. isThisIrrefutable = false;
  1920. isThisWillNotProgress = true;
  1921. isThisWillNotRegress = true;
  1922. isFirstExact = true;
  1923. CountDomain prevConsumes;
  1924. int item = 0;
  1925. for (AltNode *curr = this; curr != 0; curr = curr->tail, item++)
  1926. {
  1927. curr->head->AnnotatePass1(compiler, parentNotInLoop, false, parentNotSpeculative, isNotNegated);
  1928. features |= curr->head->features;
  1929. if (!curr->head->isThisWillNotProgress)
  1930. isThisWillNotProgress = false;
  1931. if (!curr->head->isThisWillNotRegress)
  1932. isThisWillNotRegress = false;
  1933. if (item == 0)
  1934. prevConsumes = thisConsumes = curr->head->thisConsumes;
  1935. else
  1936. {
  1937. thisConsumes.Lub(curr->head->thisConsumes);
  1938. if (!curr->head->thisConsumes.IsLessThan(prevConsumes))
  1939. isThisWillNotProgress = false;
  1940. if (!curr->head->thisConsumes.IsGreaterThan(prevConsumes))
  1941. isThisWillNotRegress = false;
  1942. prevConsumes = curr->head->thisConsumes;
  1943. }
  1944. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1945. if (!curr->head->isFirstExact || curr->head->isThisIrrefutable)
  1946. // If any item is irrefutable then later items may never be taken, so first set cannot be exact
  1947. isFirstExact = false;
  1948. if (curr->head->isThisIrrefutable)
  1949. isThisIrrefutable = true;
  1950. }
  1951. }
  1952. void AltNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1953. {
  1954. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1955. prevConsumes = accumConsumes;
  1956. isPrevWillNotProgress = accumPrevWillNotProgress;
  1957. isPrevWillNotRegress = accumPrevWillNotRegress;
  1958. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1959. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1960. }
  1961. void AltNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1962. {
  1963. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1964. followConsumes = accumConsumes;
  1965. followSet = accumFollow;
  1966. isFollowIrrefutable = accumFollowIrrefutable;
  1967. isFollowEOL = accumFollowEOL;
  1968. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1969. {
  1970. curr->head->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  1971. // AltNode has hardfail BOI test if all child Nodes have hardfail BOI tests
  1972. hasInitialHardFailBOI = curr->head->hasInitialHardFailBOI && (hasInitialHardFailBOI || (curr == this));
  1973. }
  1974. }
  1975. void AltNode::AnnotatePass4(Compiler& compiler)
  1976. {
  1977. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1978. //
  1979. // Simplification rule
  1980. //
  1981. // If the follow is irrefutable then we can ignore all items after an irrefutable item, since
  1982. // we'll never be able to backtrack into them.
  1983. // E.g.: (a*|b*)c* === a*c*
  1984. //
  1985. bool simplified = false;
  1986. if (isFollowIrrefutable && isThisIrrefutable)
  1987. {
  1988. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1989. {
  1990. if (curr->head->isThisIrrefutable && curr->tail != 0)
  1991. {
  1992. curr->tail = 0;
  1993. simplified = true;
  1994. break;
  1995. }
  1996. }
  1997. }
  1998. if (simplified)
  1999. {
  2000. Assert(!isFirstExact);
  2001. // Recalculate firstSet. Since it can only get smaller, and alternative could not have had an exact
  2002. // first set, this recalculation does not make any decisions already made based on the current firstSet
  2003. // unsound.
  2004. // NOTE: Is it worth recalculating the WillNotProgress/WillNotRegress bools?
  2005. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  2006. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2007. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  2008. }
  2009. //
  2010. // Annotate items
  2011. //
  2012. isDeterministic = true;
  2013. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2014. {
  2015. curr->head->AnnotatePass4(compiler);
  2016. if (!curr->head->isDeterministic)
  2017. isDeterministic = false;
  2018. }
  2019. //
  2020. // Compilation scheme: Switch/Chain/Set, not isOptional
  2021. //
  2022. // If no item can match empty and all items' FIRST sets are pairwise disjoint then we can
  2023. // commit to an item using a 1 char lookahead. We can fall-through to the last
  2024. // item without guarding it since it will fail if the next character cannot match.
  2025. // E.g.: (abc|def)
  2026. //
  2027. {
  2028. // Pass 1: Items cannot match empty, accumulate counts
  2029. bool fires = true;
  2030. bool allCompact = true;
  2031. bool allSimpleOneChar = true;
  2032. int numItems = 0;
  2033. uint totalChars = 0;
  2034. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2035. {
  2036. if (curr->head->thisConsumes.CouldMatchEmpty())
  2037. {
  2038. fires = false;
  2039. break;
  2040. }
  2041. numItems++;
  2042. if (!curr->head->firstSet->IsCompact())
  2043. {
  2044. allCompact = false;
  2045. }
  2046. if (!curr->head->IsSimpleOneChar())
  2047. {
  2048. allSimpleOneChar = false;
  2049. }
  2050. totalChars += curr->head->firstSet->Count();
  2051. }
  2052. if (fires)
  2053. {
  2054. // To go from two to one items requires the first item
  2055. // to be irrefutable, in which case it could match empty and this rule won't fire.
  2056. Assert(numItems > 1);
  2057. // Step 2: Check FIRST sets are disjoint
  2058. if (totalChars == firstSet->Count())
  2059. {
  2060. // **COMMIT**
  2061. if (allSimpleOneChar)
  2062. {
  2063. // This will probably never fire since the parser has already converted alts-of-chars/sets
  2064. // to sets. We include it for symmetry with below.
  2065. scheme = Set;
  2066. }
  2067. else if (allCompact && totalChars <= Switch24Inst::MaxCases)
  2068. {
  2069. // Can use a switch instruction to jump to item
  2070. scheme = Switch;
  2071. switchSize = totalChars;
  2072. }
  2073. else
  2074. {
  2075. // Must use a chain of jump instructions to jump to item
  2076. scheme = Chain;
  2077. }
  2078. isOptional = false;
  2079. return;
  2080. }
  2081. }
  2082. }
  2083. //
  2084. // Compilation scheme: None/Switch/Chain/Set, isOptional
  2085. //
  2086. // Condition (1):
  2087. // If some items are empty-only, the rest (if any) cannot match empty, follow cannot match empty, and
  2088. // all items' FIRST sets are pairwise disjoint and disjoint from the FOLLOW set, then we can commit to
  2089. // either a non-empty item or to the empty item using a 1 char lookahead. In this case we just emit each
  2090. // non-empty item with a guard, and fall-through to follow if no guard fires.
  2091. // E.g.: (abc||def)h
  2092. //
  2093. // Condition (2):
  2094. // If some items are empty-only, the rest (if any) cannot match empty, follow is irrefutable, and all
  2095. // items' FIRST sets are pairwise disjoint, then we can commit to either a non-empty item or to the empty
  2096. // item using a 1 char lookahead, provided each non-empty item obeys the condition:
  2097. // ** the item can't fail if given an arbitrary input starting with a character in its FIRST set **
  2098. // Currently, we can prove that only for IsSimpleOneChar items, though more analysis could widen the class.
  2099. // Again, we emit each non-empty item with a guard, and fall-through to follow if no guard fires.
  2100. // E.g.: ([abc]|)a*
  2101. //
  2102. // Condition (3):
  2103. // If all items are empty-only, we can commit to a single empty-only item
  2104. {
  2105. // Pass 1
  2106. bool fires = false;
  2107. bool allSimpleOneChar = true;
  2108. bool allCompact = true;
  2109. int numNonEmpty = 0;
  2110. uint totalChars = 0;
  2111. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2112. {
  2113. if (curr->head->IsEmptyOnly())
  2114. {
  2115. fires = true;
  2116. }
  2117. else if (curr->head->thisConsumes.CouldMatchEmpty())
  2118. {
  2119. fires = false;
  2120. break;
  2121. }
  2122. else
  2123. {
  2124. numNonEmpty++;
  2125. if (!curr->head->IsSimpleOneChar())
  2126. {
  2127. allSimpleOneChar = false;
  2128. }
  2129. if (!curr->head->firstSet->IsCompact())
  2130. {
  2131. allCompact = false;
  2132. }
  2133. totalChars += curr->head->firstSet->Count();
  2134. }
  2135. }
  2136. if (fires)
  2137. {
  2138. // The firing condition is not strong enough yet.
  2139. fires = false;
  2140. // Check conditions (2) and (3) first because they're faster, then check condition (1).
  2141. if (numNonEmpty == 0 ||
  2142. (isFollowIrrefutable && allSimpleOneChar && totalChars == firstSet->Count()))
  2143. {
  2144. fires = true;
  2145. }
  2146. else if (!followConsumes.CouldMatchEmpty())
  2147. {
  2148. // Check whether all FIRST sets are pairwise disjoint
  2149. // and disjoint from the FOLLOW set.
  2150. CharSet<Char> unionSet;
  2151. unionSet.UnionInPlace(compiler.ctAllocator, *firstSet);
  2152. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  2153. if (totalChars + followSet->Count() == unionSet.Count())
  2154. {
  2155. fires = true;
  2156. }
  2157. }
  2158. if (fires)
  2159. {
  2160. // **COMMIT**
  2161. if (numNonEmpty == 0)
  2162. {
  2163. scheme = None;
  2164. }
  2165. else if (allSimpleOneChar)
  2166. {
  2167. scheme = Set;
  2168. }
  2169. else if (numNonEmpty > 1 && allCompact && totalChars <= Switch24Inst::MaxCases)
  2170. {
  2171. switchSize = totalChars;
  2172. scheme = Switch;
  2173. }
  2174. else
  2175. {
  2176. scheme = Chain;
  2177. }
  2178. isOptional = true;
  2179. return;
  2180. }
  2181. }
  2182. }
  2183. //
  2184. // Compilation scheme: Trie
  2185. //
  2186. // If alt is equivalent to the form:
  2187. // (literal1|...|literaln)
  2188. // (we expand items with embedded character classes such as a[bc]d to (abd|acd)) and either:
  2189. // - follow is irrefutable and no later literal is a proper prefix of an earlier literal
  2190. // (and we may ignore later literals which have an earlier literal as proper prefix)
  2191. // E.g.: (ab|ac|abd)a* === (ab|ac)a*
  2192. // or:
  2193. // - follow is not irrefutable and no literal is a proper prefix of any other literal
  2194. // and the branching factor of the resulting trie is smallish
  2195. // E.g.: (abc|abd|abe)f
  2196. // then we can use a character trie to match the appropriate item.
  2197. //
  2198. {
  2199. // Pass 1: Items must be structurally appropriate and not result in too many alternatives after expansion
  2200. bool fires = true;
  2201. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2202. {
  2203. uint numAlts = 1;
  2204. if (!curr->head->IsCharTrieArm(compiler, numAlts))
  2205. {
  2206. fires = false;
  2207. break;
  2208. }
  2209. if (numAlts > maxTrieArmExpansion)
  2210. {
  2211. fires = false;
  2212. break;
  2213. }
  2214. }
  2215. if (fires)
  2216. {
  2217. // Pass 2: Attempt to construct the trie, checking for prefixes.
  2218. CharTrie trie;
  2219. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2220. {
  2221. if (!curr->head->BuildCharTrie(compiler, &trie, 0, isFollowIrrefutable))
  2222. {
  2223. fires = false;
  2224. break;
  2225. }
  2226. }
  2227. if (fires)
  2228. {
  2229. // **COMMIT**
  2230. // If follow is irrefutable and first item is empty, the trie would be of depth zero.
  2231. // However, in this case, the first simplification rule would have replaced the alt with a
  2232. // single empty item, and the 'None' compilation scheme would have been selected above.
  2233. //
  2234. // Similarly, if all alternations are empty and follow is refutable, the trie would be
  2235. // of depth zero, and the 'None' compilation scheme would have been selected above.
  2236. Assert(!trie.IsDepthZero());
  2237. if (trie.IsDepthOne())
  2238. {
  2239. // This case will fire if follow is irrefutable and all non length one items have an
  2240. // earlier one-character item as prefix. In this case we don't need the trie: the
  2241. // firstSet has all the information.
  2242. isOptional = false;
  2243. scheme = Set;
  2244. }
  2245. else
  2246. {
  2247. // Root of trie will live in compile-time allocator, but body will be in run-time allocator
  2248. runtimeTrie = Anew(compiler.ctAllocator, RuntimeCharTrie);
  2249. runtimeTrie->CloneFrom(compiler.rtAllocator, trie);
  2250. scheme = Trie;
  2251. }
  2252. return;
  2253. }
  2254. }
  2255. }
  2256. //
  2257. // Compilation scheme: Try
  2258. //
  2259. scheme = Try;
  2260. isDeterministic = false; // NON-DETERMINISTIC
  2261. }
  2262. bool AltNode::SupportsPrefixSkipping(Compiler& compiler) const
  2263. {
  2264. return false;
  2265. }
  2266. Node* AltNode::HeadSyncronizingNode(Compiler& compiler)
  2267. {
  2268. return 0;
  2269. }
  2270. CharCount AltNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2271. {
  2272. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2273. // Here, we ignore nodes with length 1, which are Char nodes. The way the Alt node synchronization
  2274. // is currently implemented, it expects all nodes to be Literal nodes. It requires quite a bit of
  2275. // refactoring to have Alt nodes support Char nodes for synchronization, so Char nodes are ignored
  2276. // for now.
  2277. int localNumLiterals = numLiterals;
  2278. CharCount minLen = head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2279. if (minLen <= 1)
  2280. return 0;
  2281. for (AltNode* curr = tail; curr != 0; curr = curr->tail)
  2282. {
  2283. CharCount thisLen = curr->head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2284. if (thisLen <= 1)
  2285. return 0;
  2286. minLen = min(minLen, thisLen);
  2287. }
  2288. numLiterals = localNumLiterals;
  2289. if (minLen <= 1)
  2290. {
  2291. return 0;
  2292. }
  2293. return minLen;
  2294. }
  2295. void AltNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2296. {
  2297. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2298. for (const AltNode* curr = this; curr != 0; curr = curr->tail)
  2299. curr->head->CollectSyncronizingLiterals(compiler, scanners);
  2300. }
  2301. void AltNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2302. {
  2303. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2304. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  2305. bestNode = this;
  2306. }
  2307. void AltNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2308. {
  2309. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  2310. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  2311. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2312. }
  2313. void AltNode::Emit(Compiler& compiler, CharCount& skipped)
  2314. {
  2315. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2316. Assert(skipped == 0);
  2317. switch (scheme)
  2318. {
  2319. case Try:
  2320. {
  2321. //
  2322. // Compilation scheme:
  2323. //
  2324. // Try((If|Match)(Char|Set))? L2
  2325. // <item 1>
  2326. // Jump Lexit
  2327. // L2: Try((If|Match)(Char|Set))? L3
  2328. // <item 2>
  2329. // Jump Lexit
  2330. // L3: <item 3>
  2331. // Lexit:
  2332. //
  2333. Assert(!isOptional);
  2334. int numItems = 0;
  2335. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2336. numItems++;
  2337. Assert(numItems >= 1);
  2338. // Each item other than last needs to jump to exit on success
  2339. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2340. Label lastTryFixup = 0;
  2341. int item = 0;
  2342. for (AltNode* curr = this; curr != 0; curr = curr->tail, item++)
  2343. {
  2344. if (item > 0)
  2345. // Fixup previous Try
  2346. compiler.DoFixup(lastTryFixup, compiler.CurrentLabel());
  2347. CharCount itemSkipped = 0;
  2348. if (item < numItems-1)
  2349. {
  2350. // HEURISTIC: if the first set of the alternative is exact or small, and the
  2351. // alternative does not match empty, then it's probably worth using
  2352. // a Try(If|Match)(Char|Set)
  2353. if (curr->head->firstSet != 0 &&
  2354. !curr->head->thisConsumes.CouldMatchEmpty() &&
  2355. (curr->head->isFirstExact || curr->head->firstSet->Count() <= maxCharsForConditionalTry))
  2356. {
  2357. if (curr->head->SupportsPrefixSkipping(compiler))
  2358. {
  2359. if (curr->head->firstSet->IsSingleton())
  2360. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, curr->head->firstSet->Singleton())->failLabel);
  2361. else
  2362. {
  2363. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  2364. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2365. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2366. }
  2367. itemSkipped = 1;
  2368. }
  2369. else
  2370. {
  2371. if (curr->head->firstSet->IsSingleton())
  2372. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, curr->head->firstSet->Singleton())->failLabel);
  2373. else
  2374. {
  2375. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  2376. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2377. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2378. }
  2379. }
  2380. }
  2381. else
  2382. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  2383. }
  2384. curr->head->Emit(compiler, itemSkipped);
  2385. if (item < numItems-1)
  2386. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2387. }
  2388. // Fixup jumps
  2389. for (item = 0; item < numItems-1; item++)
  2390. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2391. break;
  2392. }
  2393. case None:
  2394. {
  2395. Assert(isOptional);
  2396. // Nothing to emit
  2397. break;
  2398. }
  2399. case Trie:
  2400. {
  2401. //
  2402. // Compilation scheme:
  2403. //
  2404. // MatchTrie <trie>
  2405. //
  2406. EMIT(compiler, MatchTrieInst)->trie = *runtimeTrie;
  2407. break;
  2408. }
  2409. case Switch:
  2410. {
  2411. //
  2412. // Compilation scheme:
  2413. //
  2414. // Switch(AndConsume)?(2|4|8|16|24)(<dispatch to each arm>)
  2415. // Fail (if non-optional)
  2416. // Jump Lexit (if optional)
  2417. // L1: <item1>
  2418. // Jump Lexit
  2419. // L2: <item2>
  2420. // Jump Lexit
  2421. // L3: <item3>
  2422. // Lexit:
  2423. //
  2424. Assert(switchSize <= Switch24Inst::MaxCases);
  2425. int numItems = 0;
  2426. bool allCanSkip = true;
  2427. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2428. {
  2429. if (curr->head->thisConsumes.CouldMatchEmpty())
  2430. {
  2431. Assert(isOptional);
  2432. }
  2433. else
  2434. {
  2435. numItems++;
  2436. if (!curr->head->SupportsPrefixSkipping(compiler))
  2437. {
  2438. allCanSkip = false;
  2439. }
  2440. }
  2441. }
  2442. Assert(numItems > 1);
  2443. // Each item other than last needs to jump to exit on success
  2444. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2445. // We must remember where each item begins to fixup switch
  2446. Label* caseLabels = AnewArray(compiler.ctAllocator, Label, numItems);
  2447. // We must fixup the switch arms
  2448. Label switchLabel = compiler.CurrentLabel();
  2449. Assert(switchSize <= Switch24Inst::MaxCases);
  2450. if (allCanSkip)
  2451. {
  2452. if (switchSize <= Switch2Inst::MaxCases)
  2453. {
  2454. EMIT(compiler, SwitchAndConsume2Inst);
  2455. }
  2456. else if (switchSize <= Switch4Inst::MaxCases)
  2457. {
  2458. EMIT(compiler, SwitchAndConsume4Inst);
  2459. }
  2460. else if (switchSize <= Switch8Inst::MaxCases)
  2461. {
  2462. EMIT(compiler, SwitchAndConsume8Inst);
  2463. }
  2464. else if (switchSize <= Switch16Inst::MaxCases)
  2465. {
  2466. EMIT(compiler, SwitchAndConsume16Inst);
  2467. }
  2468. else if (switchSize <= Switch24Inst::MaxCases)
  2469. {
  2470. EMIT(compiler, SwitchAndConsume24Inst);
  2471. }
  2472. else
  2473. {
  2474. AssertOrFailFastMsg(false, "It should not be possible to reach here. This implies that we entered the Switch layout with greater than the max allowable cases.");
  2475. }
  2476. }
  2477. else
  2478. {
  2479. if (switchSize <= Switch2Inst::MaxCases)
  2480. {
  2481. EMIT(compiler, Switch2Inst);
  2482. }
  2483. else if (switchSize <= Switch4Inst::MaxCases)
  2484. {
  2485. EMIT(compiler, Switch4Inst);
  2486. }
  2487. else if (switchSize <= Switch8Inst::MaxCases)
  2488. {
  2489. EMIT(compiler, Switch8Inst);
  2490. }
  2491. else if (switchSize <= Switch16Inst::MaxCases)
  2492. {
  2493. EMIT(compiler, Switch16Inst);
  2494. }
  2495. else if (switchSize <= Switch24Inst::MaxCases)
  2496. {
  2497. EMIT(compiler, Switch24Inst);
  2498. }
  2499. else
  2500. {
  2501. AssertOrFailFastMsg(false, "It should not be possible to reach here. This implies that we entered the Switch layout with greater than the max allowable cases.");
  2502. }
  2503. }
  2504. Label defaultJumpFixup = 0;
  2505. if (isOptional)
  2506. {
  2507. // Must fixup default jump to exit
  2508. defaultJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2509. }
  2510. else
  2511. {
  2512. compiler.Emit<FailInst>();
  2513. }
  2514. // Emit each item
  2515. int item = 0;
  2516. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2517. {
  2518. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2519. {
  2520. if (allCanSkip)
  2521. {
  2522. skipped = 1;
  2523. }
  2524. caseLabels[item] = compiler.CurrentLabel();
  2525. curr->head->Emit(compiler, skipped);
  2526. if (item < numItems - 1)
  2527. {
  2528. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2529. }
  2530. item++;
  2531. }
  2532. }
  2533. // Fixup exit labels
  2534. if (isOptional)
  2535. {
  2536. compiler.DoFixup(defaultJumpFixup, compiler.CurrentLabel());
  2537. }
  2538. for (item = 0; item < numItems - 1; item++)
  2539. {
  2540. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2541. }
  2542. // Fixup the switch entries
  2543. item = 0;
  2544. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2545. {
  2546. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2547. {
  2548. Char entries[CharSet<Char>::MaxCompact];
  2549. int count = curr->head->firstSet->GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  2550. Assert(count > 0);
  2551. for (int i = 0; i < count; i++)
  2552. {
  2553. if (allCanSkip)
  2554. {
  2555. if (switchSize <= Switch2Inst::MaxCases)
  2556. {
  2557. compiler.L2I(SwitchAndConsume2, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2558. }
  2559. else if (switchSize <= Switch4Inst::MaxCases)
  2560. {
  2561. compiler.L2I(SwitchAndConsume4, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2562. }
  2563. else if (switchSize <= Switch8Inst::MaxCases)
  2564. {
  2565. compiler.L2I(SwitchAndConsume8, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2566. }
  2567. else if (switchSize <= Switch16Inst::MaxCases)
  2568. {
  2569. compiler.L2I(SwitchAndConsume16, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2570. }
  2571. else if (switchSize <= Switch24Inst::MaxCases)
  2572. {
  2573. compiler.L2I(SwitchAndConsume24, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2574. }
  2575. }
  2576. else
  2577. {
  2578. if (switchSize <= Switch2Inst::MaxCases)
  2579. {
  2580. compiler.L2I(Switch2, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2581. }
  2582. else if (switchSize <= Switch4Inst::MaxCases)
  2583. {
  2584. compiler.L2I(Switch4, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2585. }
  2586. else if (switchSize <= Switch8Inst::MaxCases)
  2587. {
  2588. compiler.L2I(Switch8, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2589. }
  2590. else if (switchSize <= Switch16Inst::MaxCases)
  2591. {
  2592. compiler.L2I(Switch16, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2593. }
  2594. else if (switchSize <= Switch24Inst::MaxCases)
  2595. {
  2596. compiler.L2I(Switch24, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2597. }
  2598. }
  2599. }
  2600. item++;
  2601. }
  2602. }
  2603. break;
  2604. }
  2605. case Chain:
  2606. {
  2607. //
  2608. // Compilation scheme:
  2609. //
  2610. // JumpIfNot(Char|Set) L2
  2611. // <item1>
  2612. // Jump Lexit
  2613. // L2: JumpIfNot(Char|Set) L3
  2614. // <item2>
  2615. // Jump Lexit
  2616. // L3: <item3> (if non-optional)
  2617. // L3: JumpIfNot(Char|Set) Lexit (if optional)
  2618. // <item3> (if optional)
  2619. // Lexit:
  2620. //
  2621. int numItems = 0;
  2622. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2623. {
  2624. if (curr->head->thisConsumes.CouldMatchEmpty())
  2625. {
  2626. Assert(isOptional);
  2627. }
  2628. else
  2629. numItems++;
  2630. }
  2631. Assert(numItems > 0);
  2632. // Each item other than last needs to jump to exit on success
  2633. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2634. Label lastJumpFixup = 0;
  2635. int item = 0;
  2636. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2637. {
  2638. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2639. {
  2640. if (item > 0)
  2641. // Fixup previous Jump
  2642. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2643. CharCount itemSkipped = 0;
  2644. if (item < numItems-1 || isOptional)
  2645. {
  2646. if (curr->head->firstSet->IsSingleton())
  2647. {
  2648. if (curr->head->SupportsPrefixSkipping(compiler))
  2649. {
  2650. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, curr->head->firstSet->Singleton())->targetLabel);
  2651. itemSkipped = 1;
  2652. }
  2653. else
  2654. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, curr->head->firstSet->Singleton())->targetLabel);
  2655. }
  2656. else
  2657. {
  2658. if (curr->head->SupportsPrefixSkipping(compiler))
  2659. {
  2660. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  2661. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2662. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2663. itemSkipped = 1;
  2664. }
  2665. else
  2666. {
  2667. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  2668. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2669. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2670. }
  2671. }
  2672. }
  2673. curr->head->Emit(compiler, itemSkipped);
  2674. if (item < numItems-1)
  2675. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2676. item++;
  2677. }
  2678. }
  2679. // Fixup jumps to exit
  2680. for (item = 0; item < numItems-1; item++)
  2681. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2682. if (isOptional)
  2683. // Fixup last Jump to exit
  2684. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2685. break;
  2686. }
  2687. case Set:
  2688. {
  2689. //
  2690. // Compilation scheme:
  2691. //
  2692. // Match(Char|Set) (non optional)
  2693. // OptMatch(Char|Set) (optional)
  2694. //
  2695. if (isOptional)
  2696. {
  2697. if (firstSet->IsSingleton())
  2698. EMIT(compiler, OptMatchCharInst, firstSet->Singleton());
  2699. else
  2700. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2701. }
  2702. else
  2703. {
  2704. if (firstSet->IsSingleton())
  2705. EMIT(compiler, MatchCharInst, firstSet->Singleton());
  2706. else
  2707. EMIT(compiler, MatchSetInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2708. }
  2709. break;
  2710. }
  2711. }
  2712. }
  2713. CharCount AltNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2714. {
  2715. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2716. Assert(!isHeadSyncronizingNode);
  2717. //
  2718. // Compilation scheme:
  2719. //
  2720. // SyncToLiteralsAndBackup
  2721. //
  2722. SyncToLiteralsAndBackupInst* i =
  2723. EMIT(
  2724. compiler,
  2725. SyncToLiteralsAndBackupInst,
  2726. compiler.GetScriptContext()->GetRecycler(),
  2727. compiler.GetProgram(),
  2728. prevConsumes);
  2729. CollectSyncronizingLiterals(compiler, *i);
  2730. return 0;
  2731. }
  2732. bool AltNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2733. {
  2734. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2735. if (tail == 0 || tail->tail != 0)
  2736. // Must be exactly two alts
  2737. return false;
  2738. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2739. {
  2740. if (!oi->BeginConcat())
  2741. return false;
  2742. if (!curr->head->IsOctoquad(compiler, oi))
  2743. return false;
  2744. }
  2745. return true;
  2746. }
  2747. bool AltNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2748. {
  2749. return false;
  2750. }
  2751. bool AltNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2752. {
  2753. Assert(false);
  2754. return false;
  2755. }
  2756. #if ENABLE_REGEX_CONFIG_OPTIONS
  2757. void AltNode::Print(DebugWriter* w, const Char* litbuf) const
  2758. {
  2759. w->PrintEOL(_u("Alt()"));
  2760. PrintAnnotations(w);
  2761. w->PrintEOL(_u("{"));
  2762. w->Indent();
  2763. for (const AltNode *curr = this; curr != 0; curr = curr->tail)
  2764. curr->head->Print(w, litbuf);
  2765. w->Unindent();
  2766. w->PrintEOL(_u("}"));
  2767. }
  2768. #endif
  2769. // ----------------------------------------------------------------------
  2770. // DefineGroupNode
  2771. // ----------------------------------------------------------------------
  2772. CharCount DefineGroupNode::LiteralLength() const
  2773. {
  2774. return 0;
  2775. }
  2776. bool DefineGroupNode::IsCharOrPositiveSet() const
  2777. {
  2778. return false;
  2779. }
  2780. CharCount DefineGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2781. {
  2782. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2783. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  2784. return body->TransferPass0(compiler, litbuf);
  2785. }
  2786. void DefineGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2787. {
  2788. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2789. body->TransferPass1(compiler, litbuf);
  2790. }
  2791. bool DefineGroupNode::IsRefiningAssertion(Compiler& compiler)
  2792. {
  2793. return false;
  2794. }
  2795. void DefineGroupNode::AnnotatePass0(Compiler& compiler)
  2796. {
  2797. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2798. body->AnnotatePass0(compiler);
  2799. isWord = body->isWord;
  2800. }
  2801. void DefineGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2802. {
  2803. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2804. features = HasDefineGroup;
  2805. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, parentNotNegated);
  2806. features |= body->features;
  2807. thisConsumes = body->thisConsumes;
  2808. firstSet = body->firstSet;
  2809. isFirstExact = body->isFirstExact;
  2810. isThisIrrefutable = body->isThisIrrefutable;
  2811. isThisWillNotProgress = body->isThisWillNotProgress;
  2812. isThisWillNotRegress = body->isThisWillNotRegress;
  2813. isNotInLoop = parentNotInLoop;
  2814. isAtLeastOnce = parentAtLeastOnce;
  2815. isNotSpeculative = parentNotSpeculative;
  2816. isNotNegated = parentNotNegated;
  2817. }
  2818. void DefineGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  2819. {
  2820. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2821. prevConsumes = accumConsumes;
  2822. isPrevWillNotProgress = accumPrevWillNotProgress;
  2823. isPrevWillNotRegress = accumPrevWillNotRegress;
  2824. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  2825. }
  2826. void DefineGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  2827. {
  2828. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2829. followConsumes = accumConsumes;
  2830. followSet = accumFollow;
  2831. isFollowIrrefutable = accumFollowIrrefutable;
  2832. isFollowEOL = accumFollowEOL;
  2833. body->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  2834. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  2835. }
  2836. void DefineGroupNode::AnnotatePass4(Compiler& compiler)
  2837. {
  2838. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2839. body->AnnotatePass4(compiler);
  2840. isDeterministic = body->isDeterministic;
  2841. // If the follow is irrefutable and we're not in an assertion, then we are not going to backtrack beyond this point, so
  2842. // we don't need to save the group before updating it
  2843. noNeedToSave = isFollowIrrefutable && isNotSpeculative;
  2844. // Compilation scheme: Chomp
  2845. //
  2846. // Body consists of a loop node with a Chomp compilation scheme.
  2847. if(body->tag == NodeTag::Loop)
  2848. {
  2849. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2850. if(loop->scheme == LoopNode::CompilationScheme::Chomp && loop->repeats.lower <= 1 && loop->repeats.IsUnbounded())
  2851. {
  2852. // **COMMIT**
  2853. scheme = Chomp;
  2854. return;
  2855. }
  2856. }
  2857. // Compilation scheme: Fixed
  2858. //
  2859. // Body has fixed width, so don't need a Begin instruction to keep track of the input start offset of the group.
  2860. if (body->thisConsumes.IsFixed())
  2861. {
  2862. // **COMMIT**
  2863. scheme = Fixed;
  2864. return;
  2865. }
  2866. // Compilation scheme: BeginEnd
  2867. //
  2868. // If both the body and the follow are irrefutable, we're not in any loops, and we're not in an assertion,
  2869. // then we don't need to save the group before updating it.
  2870. // **COMMIT**
  2871. scheme = BeginEnd;
  2872. }
  2873. bool DefineGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  2874. {
  2875. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2876. if (scheme != Fixed)
  2877. // We can't skip over part of the match if the BeginDefineGroup must capture it's start
  2878. return false;
  2879. return body->SupportsPrefixSkipping(compiler);
  2880. }
  2881. Node* DefineGroupNode::HeadSyncronizingNode(Compiler& compiler)
  2882. {
  2883. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2884. if (scheme != Fixed)
  2885. // Can't skip BeginDefineGroup
  2886. return 0;
  2887. return body->HeadSyncronizingNode(compiler);
  2888. }
  2889. CharCount DefineGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2890. {
  2891. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2892. return body->MinSyncronizingLiteralLength(compiler, numLiterals);
  2893. }
  2894. void DefineGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2895. {
  2896. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2897. body->CollectSyncronizingLiterals(compiler, scanners);
  2898. }
  2899. void DefineGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2900. {
  2901. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2902. body->BestSyncronizingNode(compiler, bestNode);
  2903. }
  2904. void DefineGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2905. {
  2906. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  2907. if (groupId < minGroup)
  2908. minGroup = groupId;
  2909. if (groupId > maxGroup)
  2910. maxGroup = groupId;
  2911. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2912. }
  2913. void DefineGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  2914. {
  2915. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2916. switch (scheme)
  2917. {
  2918. case Chomp:
  2919. {
  2920. // Compilation scheme:
  2921. //
  2922. // Chomp(Char|Set)Group
  2923. Assert(body->tag == NodeTag::Loop);
  2924. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2925. const CharSet<Char> *const loopBodyFirstSet = loop->body->firstSet;
  2926. const CountDomain &repeats = loop->repeats;
  2927. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2928. if(loopBodyFirstSet->IsSingleton())
  2929. {
  2930. const Char c = loopBodyFirstSet->Singleton();
  2931. if(repeats.lower == 0)
  2932. EMIT(compiler, ChompCharGroupInst<ChompMode::Star>, c, groupId, noNeedToSave);
  2933. else
  2934. EMIT(compiler, ChompCharGroupInst<ChompMode::Plus>, c, groupId, noNeedToSave);
  2935. }
  2936. else
  2937. {
  2938. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2939. RuntimeCharSet<Char> *runtimeSet;
  2940. if(repeats.lower == 0)
  2941. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Star>, groupId, noNeedToSave)->set;
  2942. else
  2943. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Plus>, groupId, noNeedToSave)->set;
  2944. runtimeSet->CloneFrom(compiler.rtAllocator, *loopBodyFirstSet);
  2945. }
  2946. break;
  2947. }
  2948. case Fixed:
  2949. {
  2950. // Compilation scheme:
  2951. //
  2952. // <body>
  2953. // DefineGroup
  2954. Assert(body->thisConsumes.IsFixed());
  2955. body->Emit(compiler, skipped);
  2956. EMIT(compiler, DefineGroupFixedInst, groupId, body->thisConsumes.lower, noNeedToSave);
  2957. break;
  2958. }
  2959. case BeginEnd:
  2960. {
  2961. // Compilation scheme:
  2962. //
  2963. // BeginDefineGroup
  2964. // <body>
  2965. // EndDefineGroup
  2966. Assert(skipped == 0);
  2967. EMIT(compiler, BeginDefineGroupInst, groupId);
  2968. body->Emit(compiler, skipped);
  2969. EMIT(compiler, EndDefineGroupInst, groupId, noNeedToSave);
  2970. break;
  2971. }
  2972. }
  2973. }
  2974. CharCount DefineGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2975. {
  2976. Assert(false);
  2977. return 0;
  2978. }
  2979. bool DefineGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2980. {
  2981. return false;
  2982. }
  2983. bool DefineGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2984. {
  2985. return false;
  2986. }
  2987. bool DefineGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2988. {
  2989. Assert(false);
  2990. return false;
  2991. }
  2992. #if ENABLE_REGEX_CONFIG_OPTIONS
  2993. void DefineGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  2994. {
  2995. w->PrintEOL(_u("DefineGroup(%d)"), groupId);
  2996. PrintAnnotations(w);
  2997. w->PrintEOL(_u("{"));
  2998. w->Indent();
  2999. body->Print(w, litbuf);
  3000. w->Unindent();
  3001. w->PrintEOL(_u("}"));
  3002. }
  3003. #endif
  3004. // ----------------------------------------------------------------------
  3005. // MatchGroupNode
  3006. // ----------------------------------------------------------------------
  3007. CharCount MatchGroupNode::LiteralLength() const
  3008. {
  3009. return 0;
  3010. }
  3011. bool MatchGroupNode::IsCharOrPositiveSet() const
  3012. {
  3013. return false;
  3014. }
  3015. CharCount MatchGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3016. {
  3017. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  3018. return 0;
  3019. }
  3020. void MatchGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3021. {
  3022. }
  3023. bool MatchGroupNode::IsRefiningAssertion(Compiler& compiler)
  3024. {
  3025. return false;
  3026. }
  3027. void MatchGroupNode::AnnotatePass0(Compiler& compiler)
  3028. {
  3029. isWord = false;
  3030. }
  3031. void MatchGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3032. {
  3033. features = HasMatchGroup;
  3034. thisConsumes.lower = 0;
  3035. thisConsumes.upper = CharCountFlag;
  3036. firstSet = compiler.standardChars->GetFullSet();
  3037. isFirstExact = false;
  3038. isThisIrrefutable = false;
  3039. isThisWillNotProgress = true;
  3040. isThisWillNotRegress = true;
  3041. isNotInLoop = parentNotInLoop;
  3042. isAtLeastOnce = parentAtLeastOnce;
  3043. isNotSpeculative = parentNotSpeculative;
  3044. isNotNegated = parentNotNegated;
  3045. }
  3046. void MatchGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3047. {
  3048. prevConsumes = accumConsumes;
  3049. isPrevWillNotProgress = accumPrevWillNotProgress;
  3050. isPrevWillNotRegress = accumPrevWillNotRegress;
  3051. }
  3052. void MatchGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3053. {
  3054. followConsumes = accumConsumes;
  3055. followSet = accumFollow;
  3056. isFollowIrrefutable = accumFollowIrrefutable;
  3057. isFollowEOL = accumFollowEOL;
  3058. }
  3059. void MatchGroupNode::AnnotatePass4(Compiler& compiler)
  3060. {
  3061. isDeterministic = true;
  3062. }
  3063. bool MatchGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  3064. {
  3065. return false;
  3066. }
  3067. Node* MatchGroupNode::HeadSyncronizingNode(Compiler& compiler)
  3068. {
  3069. return 0;
  3070. }
  3071. CharCount MatchGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3072. {
  3073. return 0;
  3074. }
  3075. void MatchGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3076. {
  3077. Assert(false);
  3078. }
  3079. void MatchGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3080. {
  3081. }
  3082. void MatchGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3083. {
  3084. }
  3085. void MatchGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  3086. {
  3087. Assert(skipped == 0);
  3088. //
  3089. // Compilation scheme:
  3090. //
  3091. // MatchGroup
  3092. //
  3093. EMIT(compiler, MatchGroupInst, groupId);
  3094. }
  3095. CharCount MatchGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3096. {
  3097. Assert(false);
  3098. return 0;
  3099. }
  3100. bool MatchGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3101. {
  3102. return false;
  3103. }
  3104. bool MatchGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3105. {
  3106. return false;
  3107. }
  3108. bool MatchGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3109. {
  3110. Assert(false);
  3111. return false;
  3112. }
  3113. #if ENABLE_REGEX_CONFIG_OPTIONS
  3114. void MatchGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  3115. {
  3116. w->PrintEOL(_u("MatchGroup(%d)"), groupId);
  3117. PrintAnnotations(w);
  3118. }
  3119. #endif
  3120. // ----------------------------------------------------------------------
  3121. // LoopNode
  3122. // ----------------------------------------------------------------------
  3123. CharCount LoopNode::LiteralLength() const
  3124. {
  3125. return 0;
  3126. }
  3127. bool LoopNode::IsCharOrPositiveSet() const
  3128. {
  3129. return false;
  3130. }
  3131. CharCount LoopNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3132. {
  3133. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3134. Assert(repeats.upper == CharCountFlag || repeats.upper > 0);
  3135. Assert(repeats.upper == CharCountFlag || repeats.upper >= repeats.lower);
  3136. Assert(!(repeats.lower == 1 && repeats.upper == 1));
  3137. return body->TransferPass0(compiler, litbuf);
  3138. }
  3139. void LoopNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3140. {
  3141. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3142. body->TransferPass1(compiler, litbuf);
  3143. }
  3144. bool LoopNode::IsRefiningAssertion(Compiler& compiler)
  3145. {
  3146. return false;
  3147. }
  3148. void LoopNode::AnnotatePass0(Compiler& compiler)
  3149. {
  3150. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3151. body->AnnotatePass0(compiler);
  3152. isWord = !repeats.CouldMatchEmpty() && body->isWord;
  3153. }
  3154. void LoopNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3155. {
  3156. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3157. features = HasLoop;
  3158. isNotInLoop = parentNotInLoop;
  3159. isAtLeastOnce = parentAtLeastOnce;
  3160. isNotSpeculative = parentNotSpeculative;
  3161. isNotNegated = parentNotNegated;
  3162. body->AnnotatePass1(compiler, false, parentAtLeastOnce && repeats.lower > 0, parentNotSpeculative, isNotNegated);
  3163. features |= body->features;
  3164. thisConsumes = body->thisConsumes;
  3165. thisConsumes.Mult(repeats);
  3166. firstSet = body->firstSet;
  3167. isFirstExact = repeats.lower > 0 && body->isFirstExact;
  3168. isThisIrrefutable = repeats.CouldMatchEmpty() || body->isThisIrrefutable;
  3169. // Caution: Even if a greedy loop has a 'isThisWillNotProgress' body, if the body has choicepoints then
  3170. // a backtrack could resume execution at an earlier loop iteration, which may then continue to repeat
  3171. // the loop beyond the input offset which triggered the backtrack. Ideally we'd use the body's isDeterministic
  3172. // flag to tell us when that can't happen, but it's not available till pass 4, so we must make do with
  3173. // a simple-minded structural approximation.
  3174. isThisWillNotProgress = (isGreedy || repeats.IsExact(1)) && body->isThisWillNotProgress && body->IsObviouslyDeterministic();
  3175. isThisWillNotRegress = (!isGreedy || repeats.IsExact(1)) && body->isThisWillNotRegress && body->IsObviouslyDeterministic();
  3176. }
  3177. void LoopNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3178. {
  3179. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3180. prevConsumes = accumConsumes;
  3181. isPrevWillNotProgress = accumPrevWillNotProgress;
  3182. isPrevWillNotRegress = accumPrevWillNotRegress;
  3183. // May have already gone through loop when starting body
  3184. CountDomain bodyConsumes = body->thisConsumes;
  3185. CharCountOrFlag prevMax = repeats.upper;
  3186. if (prevMax != CharCountFlag)
  3187. prevMax--;
  3188. CountDomain prevLoops(0, prevMax);
  3189. bodyConsumes.Mult(prevLoops);
  3190. accumConsumes.Add(bodyConsumes);
  3191. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress && isThisWillNotProgress, accumPrevWillNotRegress && isThisWillNotRegress);
  3192. }
  3193. void LoopNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3194. {
  3195. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3196. followConsumes = accumConsumes;
  3197. followSet = accumFollow;
  3198. isFollowIrrefutable = accumFollowIrrefutable;
  3199. isFollowEOL = accumFollowEOL;
  3200. // May go through loop again when leaving body
  3201. CountDomain bodyConsumes = body->thisConsumes;
  3202. CharCountOrFlag nextMax = repeats.upper;
  3203. if (nextMax != CharCountFlag)
  3204. nextMax--;
  3205. CountDomain nextLoops(0, nextMax);
  3206. bodyConsumes.Mult(nextLoops);
  3207. accumConsumes.Add(bodyConsumes);
  3208. CharSet<Char>* innerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  3209. innerFollow->UnionInPlace(compiler.ctAllocator, *accumFollow);
  3210. innerFollow->UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3211. if (followSet->IsSingleton())
  3212. {
  3213. Assert(followSet->IsCompact());
  3214. followFirst = followSet->GetCompactChar(0);
  3215. }
  3216. /*
  3217. All of the following must be true for the loop body's follow to be irrefutable:
  3218. The loop's follow is irrefutable.
  3219. The loop can complete the required minimum number of iterations of the body without backtracking into a completed
  3220. iteration of the body.
  3221. - If repeats.lower == 0, the required minimum number of iterations is met without executing the body
  3222. - If repeats.lower == 1
  3223. - If the first iteration of the body fails, there is no previous iteration of the body to backtrack into
  3224. - After completing the first iteration of the body, the loop cannot reject the first iteration for not
  3225. making progress because the iteration is required for the loop to succeed
  3226. - If repeats.lower >= 2
  3227. - If the second iteration of the body fails, it will backtrack into the first iteration of the body
  3228. - To prevent this, the body must be irrefutable
  3229. After completing the required minimum number of iterations of the body, the loop cannot reject a subsequent
  3230. completed iteration of the body for not making progress.
  3231. - If !isGreedy || repeats.IsFixed(), there will not be any more iterations of the body, as it will proceed to
  3232. the irrefutable follow
  3233. - If !body->thisConsumes.CouldMatchEmpty(), subsequent iterations of the body cannot complete without making
  3234. progress
  3235. */
  3236. const bool isBodyFollowIrrefutable =
  3237. accumFollowIrrefutable &&
  3238. (repeats.lower <= 1 || body->isThisIrrefutable) &&
  3239. (!isGreedy || !body->thisConsumes.CouldMatchEmpty() || repeats.IsFixed());
  3240. body->AnnotatePass3(compiler, accumConsumes, innerFollow, isBodyFollowIrrefutable, false);
  3241. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3242. }
  3243. void LoopNode::AnnotatePass4(Compiler& compiler)
  3244. {
  3245. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3246. body->AnnotatePass4(compiler);
  3247. isDeterministic = body->isDeterministic;
  3248. //
  3249. // Loops can be defined by unfolding:
  3250. // r* === (rr*|)
  3251. // r*? === (|rr*?)
  3252. // Thus many of the optimizations for alternatives carry over to loops.
  3253. //
  3254. //
  3255. // Compilation scheme: None
  3256. //
  3257. // If overall loop is empty-only then emit nothing.
  3258. // (Parser has already eliminated loops with upper == 0, so this can only happen if the body is empty-only)
  3259. //
  3260. // If loop is non-greedy with lower 0 and follow is irrefutable, then loop body will never be executed
  3261. // no emit nothing.
  3262. //
  3263. if (body->IsEmptyOnly() ||
  3264. (!isGreedy && repeats.lower == 0 && isFollowIrrefutable))
  3265. {
  3266. // **COMMIT**
  3267. scheme = None;
  3268. return;
  3269. }
  3270. //
  3271. // Compilation scheme: Chain/Try
  3272. //
  3273. // If loop is greedy, with lower 0 and upper 1, then we'd like to treat it as body|<empty> so as to avoid all the loop
  3274. // overhead. However, if the body could match empty, then a match against empty must be treated as an 'iteration' of the
  3275. // loop body which made no progress. So we treat as a general loop in that case. Otherwise, we may inline two of
  3276. // AltNode's compilation schemes:
  3277. // Examples:
  3278. // - /(a*)?/.exec("") must leave group 1 undefined rather than empty.
  3279. // - /(?:a||b)?/.exec("b") chooses the empty alt, then must backtrack due to no progress, and match 'b'.
  3280. // This is not the same as /a||b|/, as picking the first empty alt would result in success.
  3281. //
  3282. // (cf AltNode's None/Switch/Chain/Set, isOptional compilation scheme)
  3283. // If body cannot match empty, follow cannot match empty, and the body FIRST set is disjoint from the FOLLOW
  3284. // set, then we can commit to the body using a 1 char lookahead.
  3285. //
  3286. // If body cannot match empty, and follow is irrefutable, then we can commit to the body using a 1 char
  3287. // lookahead provided:
  3288. // ** the body can't fail if given an arbitrary input starting with a character in its FIRST set **
  3289. //
  3290. // (cf AltNode's Try compilation scheme)
  3291. // Otherwise, protect body by a Try instruction.
  3292. //
  3293. if (isGreedy && repeats.lower == 0 && repeats.upper == 1 && !body->thisConsumes.CouldMatchEmpty())
  3294. {
  3295. // **COMMIT**
  3296. // Note that the FIRST of the loop is already the union of the body FIRST and the loop FOLLOW
  3297. if (!body->thisConsumes.CouldMatchEmpty() &&
  3298. ((!followConsumes.CouldMatchEmpty() && firstSet->Count() == body->firstSet->Count() + followSet->Count()) ||
  3299. (isFollowIrrefutable && body->IsSimpleOneChar())))
  3300. {
  3301. if (body->IsSimpleOneChar())
  3302. scheme = Set;
  3303. else
  3304. scheme = Chain;
  3305. }
  3306. else
  3307. {
  3308. scheme = Try;
  3309. isDeterministic = false; // NON-DETERMINISTIC
  3310. }
  3311. return;
  3312. }
  3313. //
  3314. // Compilation scheme: Chomp/ChompGroupLastChar
  3315. //
  3316. // If the body is a simple-one-char, or a group of a simple-one-char, and either:
  3317. // - follow is non-empty and FIRST and FOLLOW are disjoint
  3318. // - loop is greedy and follow is irrefutable
  3319. // - follow is EOL
  3320. // then consume up to upper number of characters in FIRST and fail if number consumed is not >= lower.
  3321. //
  3322. if (body->IsSimpleOneChar() || (body->tag == DefineGroup && ((DefineGroupNode*)body)->body->IsSimpleOneChar()))
  3323. {
  3324. if (!followConsumes.CouldMatchEmpty())
  3325. {
  3326. CharSet<Char> unionSet;
  3327. CharCount totalChars = 0;
  3328. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3329. totalChars += body->firstSet->Count();
  3330. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3331. totalChars += followSet->Count();
  3332. if (totalChars == unionSet.Count())
  3333. {
  3334. // **COMMIT**
  3335. if (body->tag == DefineGroup)
  3336. {
  3337. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3338. scheme = ChompGroupLastChar;
  3339. }
  3340. else
  3341. scheme = Chomp;
  3342. return;
  3343. }
  3344. }
  3345. if ((isGreedy && isFollowIrrefutable) || isFollowEOL)
  3346. {
  3347. // **COMMIT**
  3348. if (body->tag == DefineGroup)
  3349. {
  3350. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3351. scheme = ChompGroupLastChar;
  3352. }
  3353. else
  3354. scheme = Chomp;
  3355. return;
  3356. }
  3357. }
  3358. //
  3359. // Compilation scheme: Guarded
  3360. //
  3361. // If body cannot match empty, follow cannot match empty, and FIRST of body and FOLLOW are
  3362. // disjoint then can use 1 char lookahead to decide whether to commit to another loop body.
  3363. // (If the loop body fails then we know the follow will fail even with one more/fewer iterations of the
  3364. // loop body, so we can let that failure propagate without needing to push choicepoints.)
  3365. //
  3366. if (!body->thisConsumes.CouldMatchEmpty() && !followConsumes.CouldMatchEmpty())
  3367. {
  3368. CharSet<Char> unionSet;
  3369. CharCount totalChars = 0;
  3370. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3371. totalChars += body->firstSet->Count();
  3372. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3373. totalChars += followSet->Count();
  3374. if (totalChars == unionSet.Count())
  3375. {
  3376. // **COMMIT**
  3377. scheme = Guarded;
  3378. return;
  3379. }
  3380. }
  3381. //
  3382. // Compilation scheme: Fixed/FixedSet/FixedGroupLastIteration
  3383. //
  3384. // If loop is greedy, body is deterministic, non-zero fixed width, and either does not define any groups
  3385. // or has one outermost group, then we can keep track of the backtracking information in constant space.
  3386. //
  3387. // If body does have an outer group, we can avoid saving the existing group contents if the follow
  3388. // is irrefutable, we're not in an outer loop, and we're not in an assertion.
  3389. //
  3390. if (isGreedy && body->isDeterministic && !body->thisConsumes.CouldMatchEmpty() && body->thisConsumes.IsFixed())
  3391. {
  3392. if (body->tag == DefineGroup)
  3393. {
  3394. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3395. if (!bodyGroup->body->ContainsDefineGroup())
  3396. {
  3397. // **COMMIT**
  3398. scheme = FixedGroupLastIteration;
  3399. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3400. isDeterministic = false; // NON-DETERMINISTIC;
  3401. return;
  3402. }
  3403. }
  3404. else if (body->IsSimpleOneChar())
  3405. {
  3406. // **COMMIT**
  3407. scheme = FixedSet;
  3408. isDeterministic = false; // NON-DETERMINISTIC
  3409. return;
  3410. }
  3411. else if (!body->ContainsDefineGroup())
  3412. {
  3413. // **COMMIT**
  3414. scheme = Fixed;
  3415. isDeterministic = false; // NON-DETERMINISTIC
  3416. return;
  3417. }
  3418. }
  3419. //
  3420. // Compilation scheme: GreedyNoBacktrack
  3421. //
  3422. // If loop is greedy with lower == 0 and upper == inf, the loop body is deterministic and does not define
  3423. // groups, and follow is irrefutable, then we will never have to try fewer iterations of the loop once
  3424. // entering the follow. Thus we only need one continuation record on the stack to protect against failure
  3425. // for each attempt at the loop body.
  3426. //
  3427. if (isGreedy && repeats.lower == 0 && repeats.upper == CharCountFlag && body->isDeterministic && !body->ContainsDefineGroup() && isFollowIrrefutable)
  3428. {
  3429. // **COMMIT**
  3430. scheme = GreedyNoBacktrack;
  3431. return;
  3432. }
  3433. //
  3434. // Compilation scheme: BeginEnd
  3435. //
  3436. scheme = BeginEnd;
  3437. isDeterministic = false; // NON-DETERMINISTIC
  3438. }
  3439. bool LoopNode::SupportsPrefixSkipping(Compiler& compiler) const
  3440. {
  3441. return false;
  3442. }
  3443. Node* LoopNode::HeadSyncronizingNode(Compiler& compiler)
  3444. {
  3445. return 0;
  3446. }
  3447. CharCount LoopNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3448. {
  3449. return 0;
  3450. }
  3451. void LoopNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3452. {
  3453. Assert(false);
  3454. }
  3455. void LoopNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3456. {
  3457. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3458. if (repeats.lower > 0)
  3459. body->BestSyncronizingNode(compiler, bestNode);
  3460. // else: can't be sure loop will be taken
  3461. }
  3462. void LoopNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3463. {
  3464. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  3465. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3466. }
  3467. void LoopNode::Emit(Compiler& compiler, CharCount& skipped)
  3468. {
  3469. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3470. Assert(skipped == 0);
  3471. switch (scheme)
  3472. {
  3473. case BeginEnd:
  3474. {
  3475. //
  3476. // Compilation scheme:
  3477. //
  3478. // Lloop: BeginLoop Lexit
  3479. // <loop body>
  3480. // RepeatLoop Lloop
  3481. // Lexit:
  3482. //
  3483. int minBodyGroupId = compiler.program->numGroups;
  3484. int maxBodyGroupId = -1;
  3485. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3486. Label beginLabel = compiler.CurrentLabel();
  3487. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId, isGreedy)->exitLabel);
  3488. body->Emit(compiler, skipped);
  3489. EMIT(compiler, RepeatLoopInst, beginLabel);
  3490. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3491. break;
  3492. }
  3493. case None:
  3494. {
  3495. // Nothing to emit
  3496. break;
  3497. }
  3498. case Chomp:
  3499. {
  3500. //
  3501. // Compilation scheme:
  3502. //
  3503. // Chomp(Char|Set)(Star|Plus|Bounded)
  3504. //
  3505. if(body->firstSet->IsSingleton())
  3506. {
  3507. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3508. {
  3509. if(repeats.lower == 0)
  3510. EMIT(compiler, ChompCharInst<ChompMode::Star>, body->firstSet->Singleton());
  3511. else
  3512. EMIT(compiler, ChompCharInst<ChompMode::Plus>, body->firstSet->Singleton());
  3513. }
  3514. else
  3515. EMIT(compiler, ChompCharBoundedInst, body->firstSet->Singleton(), repeats);
  3516. }
  3517. else
  3518. {
  3519. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3520. {
  3521. if(repeats.lower == 0)
  3522. EMIT(compiler, ChompSetInst<ChompMode::Star>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3523. else
  3524. EMIT(compiler, ChompSetInst<ChompMode::Plus>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3525. }
  3526. else
  3527. EMIT(compiler, ChompSetBoundedInst, repeats)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3528. }
  3529. break;
  3530. }
  3531. case ChompGroupLastChar:
  3532. {
  3533. //
  3534. // Compilation scheme:
  3535. //
  3536. // ChompSetGroup
  3537. //
  3538. Assert(body->tag == DefineGroup);
  3539. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3540. EMIT(compiler, ChompSetBoundedGroupLastCharInst, repeats, bodyGroup->groupId, noNeedToSave)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3541. break;
  3542. }
  3543. case Guarded:
  3544. {
  3545. //
  3546. // Compilation scheme:
  3547. //
  3548. // Lloop: BeginLoopIf(Char|Set) Lexit
  3549. // <loop body>
  3550. // RepeatLoopIf(Char|Set) Lloop
  3551. // Lexit:
  3552. //
  3553. int minBodyGroupId = compiler.program->numGroups;
  3554. int maxBodyGroupId = -1;
  3555. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3556. Label beginLabel = compiler.CurrentLabel();
  3557. Label exitFixup;
  3558. if (body->firstSet->IsSingleton())
  3559. exitFixup = compiler.GetFixup(&EMIT(compiler, BeginLoopIfCharInst, body->firstSet->Singleton(), compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId)->exitLabel);
  3560. else
  3561. {
  3562. BeginLoopIfSetInst* i = EMIT(compiler, BeginLoopIfSetInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId);
  3563. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3564. exitFixup = compiler.GetFixup(&i->exitLabel);
  3565. }
  3566. body->Emit(compiler, skipped);
  3567. if (body->firstSet->IsSingleton())
  3568. EMIT(compiler, RepeatLoopIfCharInst, beginLabel);
  3569. else
  3570. EMIT(compiler, RepeatLoopIfSetInst, beginLabel);
  3571. compiler.DoFixup(exitFixup, compiler.CurrentLabel());
  3572. break;
  3573. }
  3574. case Fixed:
  3575. {
  3576. //
  3577. // Compilation scheme:
  3578. //
  3579. // Lloop: BeginLoopFixed Lexit
  3580. // <loop body>
  3581. // RepeatLoopFixed Lloop
  3582. // Lexit:
  3583. //
  3584. Assert(!body->ContainsDefineGroup());
  3585. Assert(body->thisConsumes.IsFixed());
  3586. Assert(body->thisConsumes.lower > 0);
  3587. Assert(body->isDeterministic);
  3588. Label beginLabel = compiler.CurrentLabel();
  3589. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower)->exitLabel);
  3590. body->Emit(compiler, skipped);
  3591. EMIT(compiler, RepeatLoopFixedInst, beginLabel);
  3592. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3593. break;
  3594. }
  3595. case FixedSet:
  3596. {
  3597. //
  3598. // Compilation scheme:
  3599. //
  3600. // LoopSet
  3601. //
  3602. Assert(body->IsSimpleOneChar());
  3603. if (followFirst == MaxChar || PHASE_OFF1(Js::RegexOptBTPhase))
  3604. {
  3605. EMIT(compiler, LoopSetInst, compiler.NextLoopId(), repeats, !isNotInLoop)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3606. }
  3607. else
  3608. {
  3609. EMIT(compiler, LoopSetWithFollowFirstInst, compiler.NextLoopId(), repeats, !isNotInLoop, followFirst)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3610. }
  3611. break;
  3612. }
  3613. case FixedGroupLastIteration:
  3614. {
  3615. //
  3616. // Compilation scheme:
  3617. //
  3618. // Lloop: BeginLoopFixedGroupLastIteration Lexit
  3619. // <loop body>
  3620. // RepeatLoopFixedGroupLastIteration Lloop
  3621. // Lexit:
  3622. //
  3623. Assert(body->tag == DefineGroup);
  3624. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3625. Assert(body->thisConsumes.IsFixed());
  3626. Assert(body->thisConsumes.lower > 0);
  3627. Assert(body->isDeterministic);
  3628. Label beginLabel = compiler.CurrentLabel();
  3629. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedGroupLastIterationInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower, bodyGroup->groupId, noNeedToSave)->exitLabel);
  3630. bodyGroup->body->Emit(compiler, skipped);
  3631. EMIT(compiler, RepeatLoopFixedGroupLastIterationInst, beginLabel);
  3632. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3633. break;
  3634. }
  3635. case GreedyNoBacktrack:
  3636. {
  3637. //
  3638. // Compilation scheme:
  3639. //
  3640. // Lloop: BeginGreedyLoopNoBacktrack Lexit
  3641. // <loop body>
  3642. // RepeatGreedyLoopNoBacktrack Lloop
  3643. // Lexit:
  3644. //
  3645. Assert(!body->ContainsDefineGroup());
  3646. Assert(isGreedy);
  3647. Assert(repeats.lower == 0);
  3648. Assert(repeats.upper == CharCountFlag);
  3649. Assert(body->isDeterministic);
  3650. Label beginLabel = compiler.CurrentLabel();
  3651. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginGreedyLoopNoBacktrackInst, compiler.NextLoopId())->exitLabel);
  3652. body->Emit(compiler, skipped);
  3653. EMIT(compiler, RepeatGreedyLoopNoBacktrackInst, beginLabel);
  3654. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3655. break;
  3656. }
  3657. case Set:
  3658. {
  3659. //
  3660. // Compilation scheme:
  3661. //
  3662. // OptMatch(Char|Set)
  3663. //
  3664. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3665. if (body->firstSet->IsSingleton())
  3666. EMIT(compiler, OptMatchCharInst, body->firstSet->Singleton());
  3667. else
  3668. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3669. break;
  3670. }
  3671. case Chain:
  3672. {
  3673. //
  3674. // Compilation scheme:
  3675. //
  3676. // JumpIfNot(Char|Set) Lexit
  3677. // <body>
  3678. // Lexit:
  3679. //
  3680. //
  3681. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3682. Label jumpFixup = 0;
  3683. CharCount bodySkipped = 0;
  3684. if (body->firstSet->IsSingleton())
  3685. {
  3686. if (body->SupportsPrefixSkipping(compiler))
  3687. {
  3688. jumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, body->firstSet->Singleton())->targetLabel);
  3689. bodySkipped = 1;
  3690. }
  3691. else
  3692. jumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, body->firstSet->Singleton())->targetLabel);
  3693. }
  3694. else
  3695. {
  3696. if (body->SupportsPrefixSkipping(compiler))
  3697. {
  3698. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  3699. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3700. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3701. bodySkipped = 1;
  3702. }
  3703. else
  3704. {
  3705. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  3706. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3707. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3708. }
  3709. }
  3710. body->Emit(compiler, bodySkipped);
  3711. compiler.DoFixup(jumpFixup, compiler.CurrentLabel());
  3712. break;
  3713. }
  3714. case Try:
  3715. {
  3716. //
  3717. // Compilation scheme:
  3718. //
  3719. // Try((If|Match)(Char|Set))? Lexit
  3720. // <loop body>
  3721. // Lexit:
  3722. //
  3723. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3724. Label tryFixup = 0;
  3725. CharCount bodySkipped = 0;
  3726. // HEURISTIC: if the first set of the body is exact or small, and the
  3727. // body does not match empty, then it's probably worth using
  3728. // a Try(If|Match)(Char|Set)
  3729. if (!body->thisConsumes.CouldMatchEmpty() &&
  3730. (body->isFirstExact || body->firstSet->Count() <= maxCharsForConditionalTry))
  3731. {
  3732. if (body->SupportsPrefixSkipping(compiler))
  3733. {
  3734. if (body->firstSet->IsSingleton())
  3735. tryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, body->firstSet->Singleton())->failLabel);
  3736. else
  3737. {
  3738. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  3739. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3740. tryFixup = compiler.GetFixup(&i->failLabel);
  3741. }
  3742. bodySkipped = 1;
  3743. }
  3744. else
  3745. {
  3746. if(body->firstSet->IsSingleton())
  3747. tryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, body->firstSet->Singleton())->failLabel);
  3748. else
  3749. {
  3750. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  3751. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3752. tryFixup = compiler.GetFixup(&i->failLabel);
  3753. }
  3754. }
  3755. }
  3756. else
  3757. tryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  3758. body->Emit(compiler, bodySkipped);
  3759. // Fixup Try
  3760. compiler.DoFixup(tryFixup, compiler.CurrentLabel());
  3761. break;
  3762. }
  3763. }
  3764. }
  3765. CharCount LoopNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3766. {
  3767. Assert(false);
  3768. return 0;
  3769. }
  3770. bool LoopNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3771. {
  3772. return false;
  3773. }
  3774. bool LoopNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3775. {
  3776. return false;
  3777. }
  3778. bool LoopNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3779. {
  3780. Assert(false);
  3781. return false;
  3782. }
  3783. #if ENABLE_REGEX_CONFIG_OPTIONS
  3784. void LoopNode::Print(DebugWriter* w, const Char* litbuf) const
  3785. {
  3786. w->Print(_u("Loop("));
  3787. repeats.Print(w);
  3788. w->PrintEOL(_u(", %s)"), isGreedy ? _u("greedy") : _u("non-greedy"));
  3789. PrintAnnotations(w);
  3790. w->PrintEOL(_u("{"));
  3791. w->Indent();
  3792. body->Print(w, litbuf);
  3793. w->Unindent();
  3794. w->PrintEOL(_u("}"));
  3795. }
  3796. #endif
  3797. // ----------------------------------------------------------------------
  3798. // AssertionNode
  3799. // ----------------------------------------------------------------------
  3800. CharCount AssertionNode::LiteralLength() const
  3801. {
  3802. return 0;
  3803. }
  3804. bool AssertionNode::IsCharOrPositiveSet() const
  3805. {
  3806. return false;
  3807. }
  3808. CharCount AssertionNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3809. {
  3810. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3811. return body->TransferPass0(compiler, litbuf);
  3812. }
  3813. void AssertionNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3814. {
  3815. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3816. body->TransferPass1(compiler, litbuf);
  3817. }
  3818. bool AssertionNode::IsRefiningAssertion(Compiler& compiler)
  3819. {
  3820. return !isNegation;
  3821. }
  3822. void AssertionNode::AnnotatePass0(Compiler& compiler)
  3823. {
  3824. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3825. isWord = false;
  3826. body->AnnotatePass0(compiler);
  3827. }
  3828. void AssertionNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3829. {
  3830. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3831. features = HasAssertion;
  3832. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, false, parentNotNegated && !isNegation);
  3833. features |= body->features;
  3834. thisConsumes.Exact(0);
  3835. if (isNegation)
  3836. firstSet = compiler.standardChars->GetFullSet();
  3837. else
  3838. firstSet = body->firstSet;
  3839. isFirstExact = false;
  3840. if (isNegation)
  3841. // This will always fail
  3842. isThisIrrefutable = false;
  3843. else
  3844. // If body is irrefutable overall assertion is irrefutable
  3845. isThisIrrefutable = body->isThisIrrefutable;
  3846. isThisWillNotProgress = true;
  3847. isThisWillNotRegress = true;
  3848. isNotInLoop = parentNotInLoop;
  3849. isAtLeastOnce = parentAtLeastOnce;
  3850. isNotSpeculative = parentNotSpeculative;
  3851. isNotNegated = parentNotNegated;
  3852. }
  3853. void AssertionNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3854. {
  3855. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3856. prevConsumes = accumConsumes;
  3857. isPrevWillNotProgress = accumPrevWillNotProgress;
  3858. isPrevWillNotRegress = accumPrevWillNotRegress;
  3859. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  3860. }
  3861. void AssertionNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3862. {
  3863. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3864. followConsumes = accumConsumes;
  3865. followSet = accumFollow;
  3866. isFollowIrrefutable = accumFollowIrrefutable;
  3867. isFollowEOL = accumFollowEOL;
  3868. // Can't say anything about what the assertion body will see at its end
  3869. CountDomain innerConsumes;
  3870. CharSet<Char>* innerFollow = compiler.standardChars->GetFullSet();
  3871. // We can never backtrack into the body of an assertion (the continuation stack is cut)
  3872. body->AnnotatePass3(compiler, innerConsumes, innerFollow, true, false);
  3873. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3874. }
  3875. void AssertionNode::AnnotatePass4(Compiler& compiler)
  3876. {
  3877. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3878. body->AnnotatePass4(compiler);
  3879. // Even if body is non-deterministic we cut the choicepoints on exit from the assertion,
  3880. // so overall assertion is deterministic.
  3881. isDeterministic = true;
  3882. //
  3883. // Compilation scheme: Fail
  3884. //
  3885. // If body is irrefutable, assertion will always fail (and will leave groups empty).
  3886. //
  3887. if (isNegation && body->isThisIrrefutable)
  3888. {
  3889. // ***COMMIT***
  3890. scheme = Fail;
  3891. return;
  3892. }
  3893. //
  3894. // Compilation scheme: Succ
  3895. //
  3896. // If body is irrefutable, assertion will always succeed. If it does not define groups
  3897. // we can eliminate it altogether.
  3898. //
  3899. if (!isNegation && body->isThisIrrefutable && !body->ContainsDefineGroup())
  3900. {
  3901. // **COMMIT**
  3902. scheme = Succ;
  3903. return;
  3904. }
  3905. //
  3906. // Compilation scheme: BeginEnd
  3907. //
  3908. scheme = BeginEnd;
  3909. }
  3910. bool AssertionNode::SupportsPrefixSkipping(Compiler& compiler) const
  3911. {
  3912. return false;
  3913. }
  3914. Node* AssertionNode::HeadSyncronizingNode(Compiler& compiler)
  3915. {
  3916. return 0;
  3917. }
  3918. CharCount AssertionNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3919. {
  3920. return 0;
  3921. }
  3922. void AssertionNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3923. {
  3924. Assert(false);
  3925. }
  3926. void AssertionNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3927. {
  3928. }
  3929. void AssertionNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3930. {
  3931. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  3932. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3933. }
  3934. void AssertionNode::Emit(Compiler& compiler, CharCount& skipped)
  3935. {
  3936. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3937. Assert(skipped == 0);
  3938. switch (scheme)
  3939. {
  3940. case BeginEnd:
  3941. {
  3942. //
  3943. // Compilation scheme:
  3944. //
  3945. // BeginAssertion Lexit
  3946. // <body>
  3947. // EndAssertion
  3948. // Lexit:
  3949. //
  3950. int minBodyGroupId = compiler.program->numGroups;
  3951. int maxBodyGroupId = -1;
  3952. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3953. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginAssertionInst, isNegation, minBodyGroupId, maxBodyGroupId)->nextLabel);
  3954. body->Emit(compiler, skipped);
  3955. EMIT(compiler, EndAssertionInst);
  3956. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3957. break;
  3958. }
  3959. case Succ:
  3960. {
  3961. // Nothing to emit
  3962. break;
  3963. }
  3964. case Fail:
  3965. {
  3966. //
  3967. // Compilation scheme:
  3968. //
  3969. // Fail
  3970. //
  3971. EMIT(compiler, FailInst);
  3972. break;
  3973. }
  3974. }
  3975. }
  3976. CharCount AssertionNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3977. {
  3978. Assert(false);
  3979. return 0;
  3980. }
  3981. bool AssertionNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3982. {
  3983. return false;
  3984. }
  3985. bool AssertionNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3986. {
  3987. return false;
  3988. }
  3989. bool AssertionNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3990. {
  3991. Assert(false);
  3992. return false;
  3993. }
  3994. #if ENABLE_REGEX_CONFIG_OPTIONS
  3995. void AssertionNode::Print(DebugWriter* w, const Char* litbuf) const
  3996. {
  3997. w->PrintEOL(_u("Assertion(%s)"), isNegation ? _u("negative") : _u("positive"));
  3998. PrintAnnotations(w);
  3999. w->PrintEOL(_u("{"));
  4000. w->Indent();
  4001. body->Print(w, litbuf);
  4002. w->Unindent();
  4003. w->PrintEOL(_u("}"));
  4004. }
  4005. #endif
  4006. // ----------------------------------------------------------------------
  4007. // Compiler
  4008. // ----------------------------------------------------------------------
  4009. Compiler::Compiler
  4010. ( Js::ScriptContext* scriptContext
  4011. , ArenaAllocator* ctAllocator
  4012. , ArenaAllocator* rtAllocator
  4013. , StandardChars<Char>* standardChars
  4014. , Program* program
  4015. #if ENABLE_REGEX_CONFIG_OPTIONS
  4016. , DebugWriter* w
  4017. , RegexStats* stats
  4018. #endif
  4019. )
  4020. : scriptContext(scriptContext)
  4021. , ctAllocator(ctAllocator)
  4022. , rtAllocator(rtAllocator)
  4023. , standardChars(standardChars)
  4024. #if ENABLE_REGEX_CONFIG_OPTIONS
  4025. , w(w)
  4026. , stats(stats)
  4027. #endif
  4028. , program(program)
  4029. , instBuf(0)
  4030. , instLen(0)
  4031. , instNext(0)
  4032. , nextLoopId(0)
  4033. {}
  4034. void Compiler::CaptureNoLiterals(Program* program)
  4035. {
  4036. program->rep.insts.litbuf = nullptr;
  4037. program->rep.insts.litbufLen = 0;
  4038. }
  4039. void Compiler::CaptureLiterals(Node* root, const Char* litbuf)
  4040. {
  4041. // Program will own literal buffer. Prepare buffer and nodes for case-invariant matching if necessary.
  4042. CharCount finalLen = root->TransferPass0(*this, litbuf);
  4043. if (finalLen < root->LiteralLength()) // overflowed
  4044. {
  4045. Js::Throw::OutOfMemory();
  4046. }
  4047. program->rep.insts.litbuf = finalLen == 0 ? 0 : RecyclerNewArrayLeaf(scriptContext->GetRecycler(), Char, finalLen);
  4048. program->rep.insts.litbufLen = 0;
  4049. root->TransferPass1(*this, litbuf);
  4050. Assert(program->rep.insts.litbufLen == finalLen);
  4051. }
  4052. void Compiler::EmitAndCaptureSuccInst(Recycler* recycler, Program* program)
  4053. {
  4054. program->rep.insts.insts = (uint8*)RecyclerNewLeaf(recycler, SuccInst);
  4055. program->rep.insts.instsLen = sizeof(SuccInst);
  4056. program->numLoops = 0;
  4057. }
  4058. void Compiler::CaptureInsts()
  4059. {
  4060. program->rep.insts.insts = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), uint8, instNext);
  4061. program->rep.insts.instsLen = instNext;
  4062. memcpy_s(program->rep.insts.insts, instNext, instBuf, instNext);
  4063. program->numLoops = nextLoopId;
  4064. }
  4065. void Compiler::FreeBody()
  4066. {
  4067. if (instBuf != 0)
  4068. {
  4069. ctAllocator->Free(instBuf, instLen);
  4070. instBuf = 0;
  4071. instLen = 0;
  4072. instNext = 0;
  4073. }
  4074. }
  4075. void Compiler::CompileEmptyRegex
  4076. ( Program* program
  4077. , RegexPattern* pattern
  4078. #if ENABLE_REGEX_CONFIG_OPTIONS
  4079. , DebugWriter* w
  4080. , RegexStats* stats
  4081. #endif
  4082. )
  4083. {
  4084. program->tag = Program::ProgramTag::InstructionsTag;
  4085. CaptureNoLiterals(program);
  4086. EmitAndCaptureSuccInst(pattern->GetScriptContext()->GetRecycler(), program);
  4087. }
  4088. void Compiler::Compile
  4089. ( Js::ScriptContext* scriptContext
  4090. , ArenaAllocator* ctAllocator
  4091. , ArenaAllocator* rtAllocator
  4092. , StandardChars<Char>* standardChars
  4093. , Program *program
  4094. , Node* root
  4095. , const Char* litbuf
  4096. , RegexPattern* pattern
  4097. #if ENABLE_REGEX_CONFIG_OPTIONS
  4098. , DebugWriter* w
  4099. , RegexStats* stats
  4100. #endif
  4101. )
  4102. {
  4103. #if ENABLE_REGEX_CONFIG_OPTIONS
  4104. if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST))
  4105. {
  4106. w->PrintEOL(_u("REGEX AST /%s/ {"), PointerValue(program->source));
  4107. w->Indent();
  4108. root->Print(w, litbuf);
  4109. w->Unindent();
  4110. w->PrintEOL(_u("}"));
  4111. w->Flush();
  4112. }
  4113. #endif
  4114. Compiler compiler
  4115. ( scriptContext
  4116. , ctAllocator
  4117. , rtAllocator
  4118. , standardChars
  4119. , program
  4120. #if ENABLE_REGEX_CONFIG_OPTIONS
  4121. , w
  4122. , stats
  4123. #endif
  4124. );
  4125. bool compiled = false;
  4126. if (REGEX_CONFIG_FLAG(RegexOptimize))
  4127. {
  4128. // SPECIAL CASE: Octoquad/trigrams
  4129. // (must handle before converting to case-insensitive form since the later interferes with octoquad pattern recognizer)
  4130. if (OctoquadIdentifier::Qualifies(program))
  4131. {
  4132. int numCodes;
  4133. char localCodeToChar[TrigramAlphabet::AlphaCount];
  4134. char localCharToCode[TrigramAlphabet::AsciiTableSize];
  4135. char (*codeToChar)[TrigramAlphabet::AlphaCount];
  4136. char (*charToCode)[TrigramAlphabet::AsciiTableSize];
  4137. TrigramAlphabet *trigramAlphabet = scriptContext->GetTrigramAlphabet();
  4138. if(trigramAlphabet)
  4139. {
  4140. numCodes = TrigramAlphabet::AlphaCount;
  4141. codeToChar = &trigramAlphabet->alpha;
  4142. charToCode = &trigramAlphabet->alphaBits;
  4143. }
  4144. else
  4145. {
  4146. numCodes = 0;
  4147. codeToChar = &localCodeToChar;
  4148. charToCode = &localCharToCode;
  4149. }
  4150. OctoquadIdentifier oi(numCodes, *codeToChar, *charToCode);
  4151. // We haven't captured literals yet: temporarily set the program's litbuf to be the parser's litbuf
  4152. Assert(program->rep.insts.litbuf == nullptr);
  4153. program->rep.insts.litbuf = (Char*)litbuf;
  4154. if (root->IsOctoquad(compiler, &oi) && oi.IsOctoquad())
  4155. {
  4156. program->rep.insts.litbuf = nullptr;
  4157. oi.InitializeTrigramInfo(scriptContext, pattern);
  4158. program->tag = Program::ProgramTag::OctoquadTag;
  4159. program->rep.octoquad.matcher = OctoquadMatcher::New(scriptContext->GetRecycler(), standardChars, program->GetCaseMappingSource(), &oi);
  4160. compiled = true;
  4161. }
  4162. else
  4163. program->rep.insts.litbuf = nullptr;
  4164. }
  4165. }
  4166. if (!compiled)
  4167. {
  4168. if (REGEX_CONFIG_FLAG(RegexOptimize))
  4169. {
  4170. Char c = 0;
  4171. if (root->IsSingleChar(compiler, c))
  4172. {
  4173. // SPECIAL CASE: c
  4174. program->tag = Program::ProgramTag::SingleCharTag;
  4175. program->rep.singleChar.c = c;
  4176. }
  4177. else if (root->IsBoundedWord(compiler))
  4178. {
  4179. // SPECIAL CASE: \b\w+\b
  4180. program->tag = Program::ProgramTag::BoundedWordTag;
  4181. }
  4182. else if (root->IsLeadingTrailingSpaces(compiler,
  4183. program->rep.leadingTrailingSpaces.beginMinMatch,
  4184. program->rep.leadingTrailingSpaces.endMinMatch))
  4185. {
  4186. // SPECIAL CASE: ^\s*|\s*$
  4187. program->tag = Program::ProgramTag::LeadingTrailingSpacesTag;
  4188. }
  4189. else if (root->IsBOILiteral2(compiler))
  4190. {
  4191. program->tag = Program::ProgramTag::BOILiteral2Tag;
  4192. program->rep.boiLiteral2.literal = *(DWORD *)litbuf;
  4193. }
  4194. else
  4195. {
  4196. program->tag = Program::ProgramTag::InstructionsTag;
  4197. compiler.CaptureLiterals(root, litbuf);
  4198. root->AnnotatePass0(compiler);
  4199. root->AnnotatePass1(compiler, true, true, true, true);
  4200. // Nothing comes before or after overall pattern
  4201. CountDomain consumes(0);
  4202. // Match could progress from lhs (since we try successive start positions), but can never regress
  4203. root->AnnotatePass2(compiler, consumes, false, true);
  4204. // Anything could follow an end of pattern match
  4205. CharSet<Char>* follow = standardChars->GetFullSet();
  4206. root->AnnotatePass3(compiler, consumes, follow, true, false);
  4207. root->AnnotatePass4(compiler);
  4208. #if ENABLE_REGEX_CONFIG_OPTIONS
  4209. if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST) && REGEX_CONFIG_FLAG(RegexDebugAnnotatedAST))
  4210. {
  4211. w->PrintEOL(_u("REGEX ANNOTATED AST /%s/ {"), PointerValue(program->source));
  4212. w->Indent();
  4213. root->Print(w, program->rep.insts.litbuf);
  4214. w->Unindent();
  4215. w->PrintEOL(_u("}"));
  4216. w->Flush();
  4217. }
  4218. #endif
  4219. CharCount skipped = 0;
  4220. // If the root Node has a hard fail BOI, we should not emit any synchronize Nodes
  4221. // since we can easily just search from the beginning.
  4222. if (root->hasInitialHardFailBOI == false)
  4223. {
  4224. // If the root Node doesn't have hard fail BOI but sticky flag is present don't synchronize Nodes
  4225. // since we can easily just search from the beginning. Instead set to special InstructionTag
  4226. if ((program->flags & StickyRegexFlag) != 0)
  4227. {
  4228. compiler.SetBOIInstructionsProgramForStickyFlagTag();
  4229. }
  4230. else
  4231. {
  4232. Node* bestSyncronizingNode = 0;
  4233. root->BestSyncronizingNode(compiler, bestSyncronizingNode);
  4234. Node* headSyncronizingNode = root->HeadSyncronizingNode(compiler);
  4235. if ((bestSyncronizingNode == 0 && headSyncronizingNode != 0) ||
  4236. (bestSyncronizingNode != 0 && headSyncronizingNode == bestSyncronizingNode))
  4237. {
  4238. // Scan and consume the head, continue with rest assuming head has been consumed
  4239. skipped = headSyncronizingNode->EmitScan(compiler, true);
  4240. }
  4241. else if (bestSyncronizingNode != 0)
  4242. {
  4243. // Scan for the synchronizing node, then backup ready for entire pattern
  4244. skipped = bestSyncronizingNode->EmitScan(compiler, false);
  4245. Assert(skipped == 0);
  4246. // We're synchronizing to a non-head node; if we have to back up, then try to synchronize to a character
  4247. // in the first set before running the remaining instructions
  4248. if (!bestSyncronizingNode->prevConsumes.CouldMatchEmpty()) // must back up at least one character
  4249. skipped = root->EmitScanFirstSet(compiler);
  4250. }
  4251. else
  4252. {
  4253. // Optionally scan for a character in the overall pattern's FIRST set, possibly consume it,
  4254. // then match all or remainder of pattern
  4255. skipped = root->EmitScanFirstSet(compiler);
  4256. }
  4257. }
  4258. }
  4259. root->Emit(compiler, skipped);
  4260. compiler.Emit<SuccInst>();
  4261. compiler.CaptureInsts();
  4262. }
  4263. }
  4264. else
  4265. {
  4266. program->tag = Program::ProgramTag::InstructionsTag;
  4267. compiler.CaptureLiterals(root, litbuf);
  4268. CharCount skipped = 0;
  4269. root->Emit(compiler, skipped);
  4270. compiler.Emit<SuccInst>();
  4271. compiler.CaptureInsts();
  4272. }
  4273. }
  4274. #if ENABLE_REGEX_CONFIG_OPTIONS
  4275. if (w != 0)
  4276. {
  4277. w->PrintEOL(_u("REGEX PROGRAM /%s/"), PointerValue(program->source));
  4278. program->Print(w);
  4279. w->Flush();
  4280. }
  4281. #endif
  4282. }
  4283. }