RegexCompileTime.cpp 173 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "ParserPch.h"
  6. namespace UnifiedRegex
  7. {
  8. // ----------------------------------------------------------------------
  9. // Compiler (inlines etc)
  10. // ----------------------------------------------------------------------
  11. // The VS2013 linker treats this as a redefinition of an already
  12. // defined constant and complains. So skip the declaration if we're compiling
  13. // with VS2013 or below.
  14. #if !defined(_MSC_VER) || _MSC_VER >= 1900
  15. const CharCount Compiler::initInstBufSize;
  16. #endif
  17. uint8* Compiler::Emit(size_t size)
  18. {
  19. Assert(size <= UINT32_MAX);
  20. if (instLen - instNext < size)
  21. {
  22. CharCount newLen = max(instLen, initInstBufSize);
  23. CharCount instLenPlus = (CharCount)(instLen + size - 1);
  24. // check for overflow
  25. if (instLenPlus < instLen || instLenPlus * 2 < instLenPlus)
  26. {
  27. Js::Throw::OutOfMemory();
  28. }
  29. while (newLen <= instLenPlus)
  30. {
  31. newLen *= 2;
  32. }
  33. instBuf = (uint8*)ctAllocator->Realloc(instBuf, instLen, newLen);
  34. instLen = newLen;
  35. }
  36. uint8* inst = instBuf + instNext;
  37. instNext += (CharCount)size;
  38. return inst;
  39. }
  40. template <typename T>
  41. T* Compiler::Emit()
  42. {
  43. return new(Emit(sizeof(T))) T;
  44. }
  45. #define EMIT(compiler, T, ...) (new (compiler.Emit(sizeof(T))) T(__VA_ARGS__))
  46. #define L2I(O, label) LabelToInstPointer<O##Inst>(Inst::O, label)
  47. // Remember: The machine address of an instruction is no longer valid after a subsequent emit,
  48. // so all label fixups must be done using Compiler::GetFixup / Compiler::DoFixup
  49. // ----------------------------------------------------------------------
  50. // Node
  51. // ----------------------------------------------------------------------
  52. void Node::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  53. {
  54. Assert(false);
  55. }
  56. CharCount Node::EmitScanFirstSet(Compiler& compiler)
  57. {
  58. Assert(prevConsumes.IsExact(0));
  59. if (thisConsumes.CouldMatchEmpty())
  60. // Can't be sure of consuming something in FIRST
  61. return 0;
  62. if (firstSet->Count() > maxSyncToSetSize)
  63. // HEURISTIC: If FIRST is large we'll get too many false positives
  64. return 0;
  65. //
  66. // Compilation scheme:
  67. //
  68. // SyncTo(Char|Char2|Set)And(Consume|Continue)
  69. //
  70. Char entries[CharSet<Char>::MaxCompact];
  71. int count = firstSet->GetCompactEntries(2, entries);
  72. if (SupportsPrefixSkipping(compiler))
  73. {
  74. if (count == 1)
  75. EMIT(compiler, SyncToCharAndConsumeInst, entries[0]);
  76. else if (count == 2)
  77. EMIT(compiler, SyncToChar2SetAndConsumeInst, entries[0], entries[1]);
  78. else
  79. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  80. return 1;
  81. }
  82. else
  83. {
  84. if (count == 1)
  85. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  86. else if (count == 2)
  87. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  88. else
  89. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  90. return 0;
  91. }
  92. }
  93. bool Node::IsBetterSyncronizingNode(Compiler& compiler, Node* curr, Node* proposed)
  94. {
  95. int proposedNumLiterals = 0;
  96. CharCount proposedLength = proposed->MinSyncronizingLiteralLength(compiler, proposedNumLiterals);
  97. if (proposedLength == 0 || proposedNumLiterals > maxNumSyncLiterals)
  98. // Not a synchronizable node or too many literals.
  99. return false;
  100. if (curr == nullptr)
  101. // We'll take whatever we can get
  102. return true;
  103. int currNumLiterals = 0;
  104. CharCount currLength = curr->MinSyncronizingLiteralLength(compiler, currNumLiterals);
  105. // Lexicographic ordering based on
  106. // - whether literal length is above a threshold (above is better)
  107. // - number of literals (smaller is better)
  108. // - upper bound on backup (finite is better)
  109. // - minimum literal length (longer is better)
  110. // - actual backup upper bound (shorter is better)
  111. if (proposedLength >= preferredMinSyncToLiteralLength
  112. && currLength < preferredMinSyncToLiteralLength)
  113. {
  114. return true;
  115. }
  116. if (proposedLength < preferredMinSyncToLiteralLength
  117. && currLength >= preferredMinSyncToLiteralLength)
  118. {
  119. return false;
  120. }
  121. if (proposedNumLiterals < currNumLiterals)
  122. return true;
  123. if (proposedNumLiterals > currNumLiterals)
  124. return false;
  125. if (!proposed->prevConsumes.IsUnbounded() && curr->prevConsumes.IsUnbounded())
  126. return true;
  127. if (proposed->prevConsumes.IsUnbounded() && !curr->prevConsumes.IsUnbounded())
  128. return false;
  129. if (proposedLength > currLength)
  130. return true;
  131. if (proposedLength < currLength)
  132. return false;
  133. return proposed->prevConsumes.upper < curr->prevConsumes.upper;
  134. }
  135. bool Node::IsSingleChar(Compiler& compiler, Char& outChar) const
  136. {
  137. if (tag != Node::MatchChar)
  138. return false;
  139. const MatchCharNode* node = (const MatchCharNode*)this;
  140. if (node->isEquivClass)
  141. return false;
  142. outChar = node->cs[0];
  143. return true;
  144. }
  145. bool Node::IsBoundedWord(Compiler& compiler) const
  146. {
  147. if (tag != Node::Concat)
  148. return false;
  149. const ConcatNode* concatNode = (const ConcatNode *)this;
  150. if (concatNode->head->tag != Node::WordBoundary ||
  151. concatNode->tail == 0 ||
  152. concatNode->tail->head->tag != Node::Loop ||
  153. concatNode->tail->tail == 0 ||
  154. concatNode->tail->tail->head->tag != Node::WordBoundary ||
  155. concatNode->tail->tail->tail != 0)
  156. return false;
  157. const WordBoundaryNode* enter = (const WordBoundaryNode*)concatNode->head;
  158. const LoopNode* loop = (const LoopNode*)concatNode->tail->head;
  159. const WordBoundaryNode* leave = (const WordBoundaryNode*)concatNode->tail->tail->head;
  160. if (enter->isNegation ||
  161. !loop->isGreedy ||
  162. loop->repeats.lower != 1 ||
  163. loop->repeats.upper != CharCountFlag ||
  164. loop->body->tag != Node::MatchSet ||
  165. leave->isNegation)
  166. return false;
  167. const MatchSetNode* wordSet = (const MatchSetNode*)loop->body;
  168. if (wordSet->isNegation)
  169. return false;
  170. return wordSet->set.IsEqualTo(*compiler.standardChars->GetWordSet());
  171. }
  172. bool Node::IsBOILiteral2(Compiler& compiler) const
  173. {
  174. if (tag != Node::Concat)
  175. return false;
  176. const ConcatNode* concatNode = (const ConcatNode *)this;
  177. if ((compiler.program->flags & (IgnoreCaseRegexFlag | MultilineRegexFlag)) != 0 ||
  178. concatNode->head->tag != Node::BOL ||
  179. concatNode->tail == nullptr ||
  180. concatNode->tail->head->tag != Node::MatchLiteral ||
  181. concatNode->tail->tail != nullptr ||
  182. ((MatchLiteralNode *)concatNode->tail->head)->isEquivClass ||
  183. ((MatchLiteralNode *)concatNode->tail->head)->length != 2)
  184. {
  185. return false;
  186. }
  187. return true;
  188. }
  189. bool Node::IsLeadingTrailingSpaces(Compiler& compiler, CharCount& leftMinMatch, CharCount& rightMinMatch) const
  190. {
  191. if (tag != Node::Alt)
  192. return false;
  193. if (compiler.program->flags & MultilineRegexFlag)
  194. return false;
  195. const AltNode* altNode = (const AltNode*)this;
  196. if (altNode->head->tag != Node::Concat ||
  197. altNode->tail == 0 ||
  198. altNode->tail->head->tag != Node::Concat ||
  199. altNode->tail->tail != 0)
  200. return false;
  201. const ConcatNode* left = (const ConcatNode*)altNode->head;
  202. const ConcatNode* right = (const ConcatNode*)altNode->tail->head;
  203. if (left->head->tag != Node::BOL ||
  204. left->tail == 0 ||
  205. left->tail->head->tag != Node::Loop ||
  206. left->tail->tail != 0)
  207. return false;
  208. if (right->head->tag != Node::Loop ||
  209. right->tail == 0 ||
  210. right->tail->head->tag != Node::EOL ||
  211. right->tail->tail != 0)
  212. return false;
  213. const LoopNode* leftLoop = (const LoopNode*)left->tail->head;
  214. const LoopNode* rightLoop = (const LoopNode*)right->head;
  215. if (!leftLoop->isGreedy ||
  216. leftLoop->repeats.upper != CharCountFlag ||
  217. leftLoop->body->tag != Node::MatchSet ||
  218. !rightLoop->isGreedy ||
  219. rightLoop->repeats.upper != CharCountFlag ||
  220. rightLoop->body->tag != Node::MatchSet)
  221. return false;
  222. const MatchSetNode* leftSet = (const MatchSetNode*)leftLoop->body;
  223. const MatchSetNode* rightSet = (const MatchSetNode*)rightLoop->body;
  224. if (leftSet->isNegation ||
  225. rightSet->isNegation)
  226. return false;
  227. leftMinMatch = leftLoop->repeats.lower;
  228. rightMinMatch = rightLoop->repeats.lower;
  229. return
  230. leftSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet()) &&
  231. rightSet->set.IsEqualTo(*compiler.standardChars->GetWhitespaceSet());
  232. }
  233. #if ENABLE_REGEX_CONFIG_OPTIONS
  234. void Node::PrintAnnotations(DebugWriter* w) const
  235. {
  236. if (firstSet != 0)
  237. {
  238. w->PrintEOL(_u("<"));
  239. w->Indent();
  240. w->Print(_u("features: {"));
  241. bool first = true;
  242. for (uint i = Empty; i <= Assertion; i++)
  243. {
  244. if ((features & (1 << i)) != 0)
  245. {
  246. if (first)
  247. first = false;
  248. else
  249. w->Print(_u(","));
  250. switch (i)
  251. {
  252. case Empty: w->Print(_u("Empty")); break;
  253. case BOL: w->Print(_u("BOL")); break;
  254. case EOL: w->Print(_u("EOL")); break;
  255. case WordBoundary: w->Print(_u("WordBoundary")); break;
  256. case MatchLiteral: w->Print(_u("MatchLiteral")); break;
  257. case MatchChar: w->Print(_u("MatchChar")); break;
  258. case Concat: w->Print(_u("Concat")); break;
  259. case Alt: w->Print(_u("Alt")); break;
  260. case DefineGroup: w->Print(_u("DefineGroup")); break;
  261. case MatchGroup: w->Print(_u("MatchGroup")); break;
  262. case Loop: w->Print(_u("Loop")); break;
  263. case MatchSet: w->Print(_u("MatchSet")); break;
  264. case Assertion: w->Print(_u("Assertion")); break;
  265. }
  266. }
  267. }
  268. w->PrintEOL(_u("}"));
  269. w->Print(_u("firstSet: "));
  270. firstSet->Print(w);
  271. if (isFirstExact)
  272. w->Print(_u(" (exact)"));
  273. w->EOL();
  274. w->Print(_u("followSet: "));
  275. followSet->Print(w);
  276. w->EOL();
  277. w->Print(_u("prevConsumes: "));
  278. prevConsumes.Print(w);
  279. w->EOL();
  280. w->Print(_u("thisConsumes: "));
  281. thisConsumes.Print(w);
  282. w->EOL();
  283. w->Print(_u("followConsumes: "));
  284. followConsumes.Print(w);
  285. w->EOL();
  286. w->PrintEOL(_u("isThisIrrefutable: %s"), isThisIrrefutable ? _u("true") : _u("false"));
  287. w->PrintEOL(_u("isFollowIrrefutable: %s"), isFollowIrrefutable ? _u("true") : _u("false"));
  288. w->PrintEOL(_u("isWord: %s"), isWord ? _u("true") : _u("false"));
  289. w->PrintEOL(_u("isThisWillNotProgress: %s"), isThisWillNotProgress ? _u("true") : _u("false"));
  290. w->PrintEOL(_u("isThisWillNotRegress: %s"), isThisWillNotRegress ? _u("true") : _u("false"));
  291. w->PrintEOL(_u("isPrevWillNotProgress: %s"), isPrevWillNotProgress ? _u("true") : _u("false"));
  292. w->PrintEOL(_u("isPrevWillNotRegress: %s"), isPrevWillNotRegress ? _u("true") : _u("false"));
  293. w->PrintEOL(_u("isDeterministic: %s"), isDeterministic ? _u("true") : _u("false"));
  294. w->PrintEOL(_u("isNotInLoop: %s"), isNotInLoop ? _u("true") : _u("false"));
  295. w->PrintEOL(_u("isNotNegated: %s"), isNotNegated ? _u("true") : _u("false"));
  296. w->PrintEOL(_u("isAtLeastOnce: %s"), isAtLeastOnce ? _u("true") : _u("false"));
  297. w->PrintEOL(_u("hasInitialHardFailBOI: %s"), hasInitialHardFailBOI ? _u("true") : _u("false"));
  298. w->Unindent();
  299. w->PrintEOL(_u(">"));
  300. }
  301. }
  302. #endif
  303. // ----------------------------------------------------------------------
  304. // SimpleNode
  305. // ----------------------------------------------------------------------
  306. CharCount SimpleNode::LiteralLength() const
  307. {
  308. return 0;
  309. }
  310. bool SimpleNode::IsCharOrPositiveSet() const
  311. {
  312. return false;
  313. }
  314. CharCount SimpleNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  315. {
  316. return 0;
  317. }
  318. void SimpleNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  319. {
  320. }
  321. bool SimpleNode::IsRefiningAssertion(Compiler& compiler)
  322. {
  323. return tag == EOL && (compiler.program->flags & MultilineRegexFlag) != 0;
  324. }
  325. void SimpleNode::AnnotatePass0(Compiler& compiler)
  326. {
  327. isWord = false;
  328. }
  329. void SimpleNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  330. {
  331. isFirstExact = false;
  332. thisConsumes.Exact(0);
  333. isThisWillNotProgress = true;
  334. isThisWillNotRegress = true;
  335. isNotInLoop = parentNotInLoop;
  336. isAtLeastOnce = parentAtLeastOnce;
  337. isNotSpeculative = parentNotSpeculative;
  338. isNotNegated = parentNotNegated;
  339. switch (tag)
  340. {
  341. case Empty:
  342. features = HasEmpty;
  343. firstSet = compiler.standardChars->GetEmptySet();
  344. isThisIrrefutable = true;
  345. break;
  346. case BOL:
  347. features = HasBOL;
  348. firstSet = compiler.standardChars->GetFullSet();
  349. isThisIrrefutable = false;
  350. break;
  351. case EOL:
  352. features = HasEOL;
  353. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  354. firstSet = compiler.standardChars->GetNewlineSet();
  355. else
  356. firstSet = compiler.standardChars->GetEmptySet();
  357. isThisIrrefutable = false;
  358. break;
  359. default:
  360. Assert(false);
  361. }
  362. }
  363. void SimpleNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  364. {
  365. prevConsumes = accumConsumes;
  366. isPrevWillNotProgress = accumPrevWillNotProgress;
  367. isPrevWillNotRegress = accumPrevWillNotRegress;
  368. }
  369. void SimpleNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  370. {
  371. followConsumes = accumConsumes;
  372. followSet = accumFollow;
  373. isFollowIrrefutable = accumFollowIrrefutable;
  374. isFollowEOL = accumFollowEOL;
  375. hasInitialHardFailBOI = ((tag == BOL) &&
  376. prevConsumes.IsExact(0) &&
  377. (compiler.program->flags & MultilineRegexFlag) == 0 &&
  378. isAtLeastOnce &&
  379. isNotNegated &&
  380. isPrevWillNotRegress);
  381. }
  382. void SimpleNode::AnnotatePass4(Compiler& compiler)
  383. {
  384. isDeterministic = true;
  385. }
  386. bool SimpleNode::SupportsPrefixSkipping(Compiler& compiler) const
  387. {
  388. return false;
  389. }
  390. Node* SimpleNode::HeadSyncronizingNode(Compiler& compiler)
  391. {
  392. return 0;
  393. }
  394. CharCount SimpleNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  395. {
  396. return 0;
  397. }
  398. void SimpleNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  399. {
  400. Assert(false);
  401. }
  402. void SimpleNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  403. {
  404. }
  405. void SimpleNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  406. {
  407. }
  408. void SimpleNode::Emit(Compiler& compiler, CharCount& skipped)
  409. {
  410. Assert(skipped == 0);
  411. switch (tag)
  412. {
  413. case Empty:
  414. // Nothing
  415. break;
  416. case BOL:
  417. {
  418. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  419. {
  420. //
  421. // Compilation scheme:
  422. //
  423. // BOLTest
  424. //
  425. EMIT(compiler, BOLTestInst);
  426. }
  427. else
  428. {
  429. if (compiler.CurrentLabel() == 0)
  430. {
  431. // The first instruction is BOI, change the tag and only execute it once
  432. // without looping every start position
  433. compiler.SetBOIInstructionsProgramTag();
  434. }
  435. else
  436. {
  437. //
  438. // Compilation scheme:
  439. //
  440. // BOITest
  441. //
  442. // Obviously starting later in the string won't help, so can hard fail if:
  443. // - this pattern must always be matched
  444. // - not in a negative assertion
  445. // - backtracking could never rewind the input pointer
  446. //
  447. EMIT(compiler, BOITestInst, isAtLeastOnce && isNotNegated && isPrevWillNotRegress);
  448. }
  449. }
  450. break;
  451. }
  452. case EOL:
  453. {
  454. if ((compiler.program->flags & MultilineRegexFlag) != 0)
  455. //
  456. // Compilation scheme:
  457. //
  458. // EOLTest
  459. //
  460. EMIT(compiler, EOLTestInst);
  461. else
  462. //
  463. // Compilation scheme:
  464. //
  465. // EOITest
  466. //
  467. // Can hard fail if
  468. // - this pattern must always be matched
  469. // - not in a negative assertion
  470. // - backtracking could never advance the input pointer
  471. //
  472. EMIT(compiler, EOITestInst, isAtLeastOnce && isNotNegated && isPrevWillNotProgress);
  473. break;
  474. }
  475. default:
  476. Assert(false);
  477. }
  478. }
  479. CharCount SimpleNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  480. {
  481. Assert(false);
  482. return 0;
  483. }
  484. bool SimpleNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  485. {
  486. return false;
  487. }
  488. bool SimpleNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  489. {
  490. return tag == Empty;
  491. }
  492. bool SimpleNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  493. {
  494. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  495. Assert(tag == Empty);
  496. if (cont == 0)
  497. {
  498. if (trie->Count() > 0)
  499. // This literal is a proper prefix of an earlier literal
  500. return false;
  501. trie->SetAccepting();
  502. return true;
  503. }
  504. return cont->BuildCharTrie(compiler, trie, 0, isAcceptFirst);
  505. }
  506. #if ENABLE_REGEX_CONFIG_OPTIONS
  507. void SimpleNode::Print(DebugWriter* w, const Char* litbuf) const
  508. {
  509. switch (tag)
  510. {
  511. case Empty:
  512. w->Print(_u("Empty")); break;
  513. case BOL:
  514. w->Print(_u("BOL")); break;
  515. case EOL:
  516. w->Print(_u("EOL")); break;
  517. default:
  518. Assert(false);
  519. }
  520. w->PrintEOL(_u("()"));
  521. PrintAnnotations(w);
  522. }
  523. #endif
  524. // ----------------------------------------------------------------------
  525. // WordBoundaryNode
  526. // ----------------------------------------------------------------------
  527. CharCount WordBoundaryNode::LiteralLength() const
  528. {
  529. return 0;
  530. }
  531. bool WordBoundaryNode::IsCharOrPositiveSet() const
  532. {
  533. return false;
  534. }
  535. CharCount WordBoundaryNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  536. {
  537. return 0;
  538. }
  539. void WordBoundaryNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  540. {
  541. // WordChars and NonWordChars sets are already case invariant
  542. }
  543. bool WordBoundaryNode::IsRefiningAssertion(Compiler& compiler)
  544. {
  545. return mustIncludeEntering != mustIncludeLeaving;
  546. }
  547. void WordBoundaryNode::AnnotatePass0(Compiler& compiler)
  548. {
  549. isWord = false;
  550. }
  551. void WordBoundaryNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  552. {
  553. features = HasWordBoundary;
  554. thisConsumes.Exact(0);
  555. isFirstExact = false;
  556. isThisIrrefutable = false;
  557. isThisWillNotProgress = true;
  558. isThisWillNotRegress = true;
  559. isNotInLoop = parentNotInLoop;
  560. isAtLeastOnce = parentAtLeastOnce;
  561. isNotSpeculative = parentNotSpeculative;
  562. isNotNegated = parentNotNegated;
  563. if (isNegation)
  564. firstSet = compiler.standardChars->GetFullSet();
  565. else
  566. {
  567. if (mustIncludeEntering && !mustIncludeLeaving)
  568. firstSet = compiler.standardChars->GetWordSet();
  569. else if (mustIncludeLeaving && !mustIncludeEntering)
  570. firstSet = compiler.standardChars->GetNonWordSet();
  571. else
  572. firstSet = compiler.standardChars->GetFullSet();
  573. }
  574. }
  575. void WordBoundaryNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  576. {
  577. prevConsumes = accumConsumes;
  578. isPrevWillNotProgress = accumPrevWillNotProgress;
  579. isPrevWillNotRegress = accumPrevWillNotRegress;
  580. }
  581. void WordBoundaryNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  582. {
  583. followConsumes = accumConsumes;
  584. followSet = accumFollow;
  585. isFollowIrrefutable = accumFollowIrrefutable;
  586. isFollowEOL = accumFollowEOL;
  587. }
  588. void WordBoundaryNode::AnnotatePass4(Compiler& compiler)
  589. {
  590. isDeterministic = true;
  591. }
  592. bool WordBoundaryNode::SupportsPrefixSkipping(Compiler& compiler) const
  593. {
  594. return false;
  595. }
  596. Node* WordBoundaryNode::HeadSyncronizingNode(Compiler& compiler)
  597. {
  598. return 0;
  599. }
  600. CharCount WordBoundaryNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  601. {
  602. return 0;
  603. }
  604. void WordBoundaryNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  605. {
  606. Assert(false);
  607. }
  608. void WordBoundaryNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  609. {
  610. }
  611. void WordBoundaryNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  612. {
  613. }
  614. void WordBoundaryNode::Emit(Compiler& compiler, CharCount& skipped)
  615. {
  616. Assert(skipped == 0);
  617. //
  618. // Compilation scheme:
  619. //
  620. // WordBoundaryTest
  621. //
  622. EMIT(compiler, WordBoundaryTestInst, isNegation);
  623. }
  624. CharCount WordBoundaryNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  625. {
  626. Assert(false);
  627. return 0;
  628. }
  629. bool WordBoundaryNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  630. {
  631. return false;
  632. }
  633. bool WordBoundaryNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  634. {
  635. return false;
  636. }
  637. bool WordBoundaryNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  638. {
  639. Assert(false);
  640. return false;
  641. }
  642. #if ENABLE_REGEX_CONFIG_OPTIONS
  643. void WordBoundaryNode::Print(DebugWriter* w, const Char* litbuf) const
  644. {
  645. w->PrintEOL(_u("WordBoundary(%s, %s, %s)"), isNegation ? _u("negative") : _u("positive"), mustIncludeEntering ? _u("entering") : _u("-"), mustIncludeLeaving ? _u("leaving") : _u("-"));
  646. PrintAnnotations(w);
  647. }
  648. #endif
  649. // ----------------------------------------------------------------------
  650. // MatchLiteralNode
  651. // ----------------------------------------------------------------------
  652. CharCount MatchLiteralNode::LiteralLength() const
  653. {
  654. return length;
  655. }
  656. void MatchLiteralNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  657. {
  658. // Called during parsing only, so literal always in original form
  659. Assert(!isEquivClass);
  660. Assert(litbufNext + length <= litbufLen && offset + length <= litbufLen);
  661. #pragma prefast(suppress:26000, "The error said that offset + length >= litbufLen + 1, which is incorrect due to if statement below.")
  662. if (litbufNext + length <= litbufLen && offset + length <= litbufLen) // for prefast
  663. {
  664. js_wmemcpy_s(litbuf + litbufNext, litbufLen - litbufNext, litbuf + offset, length);
  665. }
  666. litbufNext += length;
  667. }
  668. bool MatchLiteralNode::IsCharOrPositiveSet() const
  669. {
  670. return false;
  671. }
  672. CharCount MatchLiteralNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  673. {
  674. Assert(length > 1);
  675. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0
  676. && !compiler.standardChars->IsTrivialString(compiler.program->GetCaseMappingSource(), litbuf + offset, length))
  677. {
  678. // We'll need to expand each character of literal into its equivalence class
  679. isEquivClass = true;
  680. return length * CaseInsensitive::EquivClassSize;
  681. }
  682. else
  683. return length;
  684. }
  685. void MatchLiteralNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  686. {
  687. CharCount nextLit = compiler.program->rep.insts.litbufLen;
  688. if (isEquivClass)
  689. {
  690. Assert((compiler.program->flags & IgnoreCaseRegexFlag) != 0);
  691. // Expand literal according to character equivalence classes
  692. for (CharCount i = 0; i < length; i++)
  693. {
  694. compiler.standardChars->ToEquivs(
  695. compiler.program->GetCaseMappingSource(),
  696. litbuf[offset + i],
  697. compiler.program->rep.insts.litbuf + nextLit + i * CaseInsensitive::EquivClassSize);
  698. }
  699. compiler.program->rep.insts.litbufLen += length * CaseInsensitive::EquivClassSize;
  700. }
  701. else
  702. {
  703. for (CharCount i = 0; i < length; i++)
  704. compiler.program->rep.insts.litbuf[nextLit + i] = litbuf[offset + i];
  705. compiler.program->rep.insts.litbufLen += length;
  706. }
  707. offset = nextLit;
  708. }
  709. void MatchLiteralNode::AnnotatePass0(Compiler& compiler)
  710. {
  711. const Char* litbuf = compiler.program->rep.insts.litbuf;
  712. for (CharCount i = offset; i < offset + length; i++)
  713. {
  714. if (!compiler.standardChars->IsWord(litbuf[i]))
  715. {
  716. isWord = false;
  717. return;
  718. }
  719. }
  720. isWord = true;
  721. }
  722. bool MatchLiteralNode::IsRefiningAssertion(Compiler& compiler)
  723. {
  724. return false;
  725. }
  726. void MatchLiteralNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  727. {
  728. features = HasMatchLiteral;
  729. thisConsumes.Exact(length);
  730. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  731. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  732. firstSet->Set(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  733. isFirstExact = true;
  734. isThisIrrefutable = false;
  735. isThisWillNotProgress = true;
  736. isThisWillNotRegress = true;
  737. isNotInLoop = parentNotInLoop;
  738. isAtLeastOnce = parentAtLeastOnce;
  739. isNotSpeculative = parentNotSpeculative;
  740. isNotNegated = parentNotNegated;
  741. }
  742. void MatchLiteralNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  743. {
  744. prevConsumes = accumConsumes;
  745. isPrevWillNotProgress = accumPrevWillNotProgress;
  746. isPrevWillNotRegress = accumPrevWillNotRegress;
  747. }
  748. void MatchLiteralNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  749. {
  750. followConsumes = accumConsumes;
  751. followSet = accumFollow;
  752. isFollowIrrefutable = accumFollowIrrefutable;
  753. isFollowEOL = accumFollowEOL;
  754. }
  755. void MatchLiteralNode::AnnotatePass4(Compiler& compiler)
  756. {
  757. isDeterministic = true;
  758. }
  759. bool MatchLiteralNode::SupportsPrefixSkipping(Compiler& compiler) const
  760. {
  761. return true;
  762. }
  763. Node* MatchLiteralNode::HeadSyncronizingNode(Compiler& compiler)
  764. {
  765. return this;
  766. }
  767. CharCount MatchLiteralNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  768. {
  769. numLiterals++;
  770. return length;
  771. }
  772. void MatchLiteralNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  773. {
  774. ScannerMixin* scanner =
  775. scanners.Add(compiler.GetScriptContext()->GetRecycler(), compiler.GetProgram(), offset, length, isEquivClass);
  776. scanner->scanner.Setup(compiler.rtAllocator, compiler.program->rep.insts.litbuf + offset, length, isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  777. }
  778. void MatchLiteralNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  779. {
  780. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  781. bestNode = this;
  782. }
  783. void MatchLiteralNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  784. {
  785. }
  786. void MatchLiteralNode::Emit(Compiler& compiler, CharCount& skipped)
  787. {
  788. if (skipped >= length)
  789. {
  790. // Asking to skip entire literal
  791. skipped -= length;
  792. return;
  793. }
  794. //
  795. // Compilation scheme:
  796. //
  797. // Match(Char|Char4|Literal|LiteralEquiv)Inst
  798. //
  799. CharCount effectiveOffset = offset + skipped * (isEquivClass ? CaseInsensitive::EquivClassSize : 1);
  800. CharCount effectiveLength = length - skipped;
  801. skipped -= min(skipped, length);
  802. if (effectiveLength == 1)
  803. {
  804. Char* cs = compiler.program->rep.insts.litbuf + effectiveOffset;
  805. MatchCharNode::Emit(compiler, cs, isEquivClass);
  806. }
  807. else
  808. {
  809. if (isEquivClass)
  810. EMIT(compiler, MatchLiteralEquivInst, effectiveOffset, effectiveLength);
  811. else
  812. EMIT(compiler, MatchLiteralInst, effectiveOffset, effectiveLength);
  813. }
  814. }
  815. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  816. CharCount MatchLiteralNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  817. {
  818. //
  819. // Compilation scheme:
  820. //
  821. // SyncTo(Literal|LiteralEquiv|LinearLiteral)And(Continue|Consume|Backup)
  822. //
  823. Char * litptr = compiler.program->rep.insts.litbuf + offset;
  824. if (isHeadSyncronizingNode)
  825. {
  826. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  827. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  828. if (isEquivClass)
  829. {
  830. const uint lastPatCharIndex = length - 1;
  831. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  832. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  833. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  834. {
  835. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  836. }
  837. else
  838. {
  839. EMIT(compiler, SyncToLiteralEquivAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  840. }
  841. }
  842. else if (length == 1)
  843. EMIT(compiler, SyncToCharAndConsumeInst, litptr[0]);
  844. else if (length == 2)
  845. EMIT(compiler, SyncToChar2LiteralAndConsumeInst, litptr[0], litptr[1]);
  846. else
  847. {
  848. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  849. switch (setup.GetScheme())
  850. {
  851. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  852. EMIT(compiler, SyncToLinearLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  853. break;
  854. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  855. EMIT(compiler, SyncToLiteralAndConsumeInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  856. break;
  857. };
  858. }
  859. return length;
  860. }
  861. else
  862. {
  863. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  864. // inside a group for instance, then we won't need to back up but we cannot consume the literal.
  865. if (prevConsumes.IsExact(0))
  866. {
  867. if (isEquivClass)
  868. {
  869. const uint lastPatCharIndex = length - 1;
  870. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  871. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  872. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  873. {
  874. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  875. }
  876. else
  877. {
  878. EMIT(compiler, SyncToLiteralEquivAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  879. }
  880. }
  881. else if (length == 1)
  882. EMIT(compiler, SyncToCharAndContinueInst, litptr[0]);
  883. else if (length == 2)
  884. EMIT(compiler, SyncToChar2LiteralAndContinueInst, litptr[0], litptr[1]);
  885. else
  886. {
  887. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  888. switch (setup.GetScheme())
  889. {
  890. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  891. EMIT(compiler, SyncToLinearLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  892. break;
  893. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  894. EMIT(compiler, SyncToLiteralAndContinueInst, offset, length)->scanner.Setup(compiler.rtAllocator, setup);
  895. break;
  896. };
  897. }
  898. }
  899. else
  900. {
  901. if (isEquivClass)
  902. {
  903. const uint lastPatCharIndex = length - 1;
  904. if (litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 1]
  905. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 2]
  906. && litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize] == litptr[lastPatCharIndex * CaseInsensitive::EquivClassSize + 3])
  907. {
  908. EMIT(compiler, SyncToLiteralEquivTrivialLastPatCharAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  909. }
  910. else
  911. {
  912. EMIT(compiler, SyncToLiteralEquivAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, litptr, length, CaseInsensitive::EquivClassSize);
  913. }
  914. }
  915. else if (length == 1)
  916. EMIT(compiler, SyncToCharAndBackupInst, litptr[0], prevConsumes);
  917. else if (length == 2)
  918. EMIT(compiler, SyncToChar2LiteralAndBackupInst, litptr[0], litptr[1], prevConsumes);
  919. else
  920. {
  921. TextbookBoyerMooreSetup<char16> setup(litptr, length);
  922. switch (setup.GetScheme())
  923. {
  924. case TextbookBoyerMooreSetup<char16>::LinearScheme:
  925. EMIT(compiler, SyncToLinearLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  926. break;
  927. case TextbookBoyerMooreSetup<char16>::DefaultScheme:
  928. EMIT(compiler, SyncToLiteralAndBackupInst, offset, length, prevConsumes)->scanner.Setup(compiler.rtAllocator, setup);
  929. break;
  930. };
  931. }
  932. }
  933. return 0;
  934. }
  935. }
  936. bool MatchLiteralNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  937. {
  938. // We look for octoquad patterns before converting for case-insensitivity
  939. Assert(!isEquivClass);
  940. if (!oi->CouldAppend(length))
  941. return false;
  942. for (CharCount i = 0; i < length; i++)
  943. {
  944. if (!oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), compiler.program->rep.insts.litbuf[offset + i])))
  945. return false;
  946. }
  947. return true;
  948. }
  949. bool MatchLiteralNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  950. {
  951. if (isEquivClass)
  952. // The literal would expand into length^3 alternatives
  953. return false;
  954. return true;
  955. }
  956. bool MatchLiteralNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  957. {
  958. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  959. Assert(!isEquivClass);
  960. CharTrie* tail = trie;
  961. for (CharCount i = 0; i < length; i++)
  962. {
  963. if (tail->IsAccepting())
  964. {
  965. // An earlier literal is a prefix of this literal
  966. // If isAcceptFirst, can ignore suffix of already recognized literal.
  967. // Otherwise, must fail.
  968. return isAcceptFirst;
  969. }
  970. CharTrie* newTail = tail->Add(compiler.ctAllocator, compiler.program->rep.insts.litbuf[offset + i]);
  971. if (tail->Count() > maxTrieArmExpansion)
  972. return false;
  973. tail = newTail;
  974. }
  975. if (cont == 0)
  976. {
  977. if (tail->Count() > 0)
  978. // This literal is a proper prefix of an earlier literal
  979. return false;
  980. tail->SetAccepting();
  981. }
  982. else
  983. {
  984. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  985. return false;
  986. }
  987. return true;
  988. }
  989. #if ENABLE_REGEX_CONFIG_OPTIONS
  990. void MatchLiteralNode::Print(DebugWriter* w, const Char* litbuf) const
  991. {
  992. w->Print(_u("MatchLiteral("));
  993. int skip = isEquivClass ? CaseInsensitive::EquivClassSize : 1;
  994. for (int i = 0; i < skip; i++)
  995. {
  996. if (i > 0)
  997. w->Print(_u(", "));
  998. w->Print(_u("\""));
  999. for (CharCount j = 0; j < length; j++)
  1000. w->PrintEscapedChar(litbuf[offset + j * skip + i]);
  1001. w->Print(_u("\""));
  1002. }
  1003. w->PrintEOL(_u(")"));
  1004. PrintAnnotations(w);
  1005. }
  1006. #endif
  1007. // ----------------------------------------------------------------------
  1008. // MatchCharNode
  1009. // ----------------------------------------------------------------------
  1010. CharCount MatchCharNode::LiteralLength() const
  1011. {
  1012. return 1;
  1013. }
  1014. void MatchCharNode::AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const
  1015. {
  1016. Assert(!isEquivClass);
  1017. Assert(litbufNext + 1 <= litbufLen);
  1018. if (litbufNext + 1 <= litbufLen) // for prefast
  1019. litbuf[litbufNext++] = cs[0];
  1020. }
  1021. bool MatchCharNode::IsCharOrPositiveSet() const
  1022. {
  1023. return true;
  1024. }
  1025. CharCount MatchCharNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1026. {
  1027. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0)
  1028. {
  1029. Char equivs[CaseInsensitive::EquivClassSize];
  1030. bool isNonTrivial = compiler.standardChars->ToEquivs(compiler.program->GetCaseMappingSource(), cs[0], equivs);
  1031. if (isNonTrivial)
  1032. {
  1033. isEquivClass = true;
  1034. for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
  1035. {
  1036. cs[i] = equivs[i];
  1037. }
  1038. }
  1039. }
  1040. return 0;
  1041. }
  1042. void MatchCharNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1043. {
  1044. }
  1045. bool MatchCharNode::IsRefiningAssertion(Compiler& compiler)
  1046. {
  1047. return false;
  1048. }
  1049. void MatchCharNode::AnnotatePass0(Compiler& compiler)
  1050. {
  1051. // If c is a word char then all characters equivalent to c are word chars
  1052. isWord = compiler.standardChars->IsWord(cs[0]);
  1053. }
  1054. void MatchCharNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1055. {
  1056. features = HasMatchChar;
  1057. thisConsumes.Exact(1);
  1058. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1059. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1060. firstSet->Set(compiler.ctAllocator, cs[i]);
  1061. isFirstExact = true;
  1062. isThisIrrefutable = false;
  1063. isThisWillNotProgress = true;
  1064. isThisWillNotRegress = true;
  1065. isNotInLoop = parentNotInLoop;
  1066. isAtLeastOnce = parentAtLeastOnce;
  1067. isNotSpeculative = parentNotSpeculative;
  1068. isNotNegated = parentNotNegated;
  1069. }
  1070. void MatchCharNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1071. {
  1072. prevConsumes = accumConsumes;
  1073. isPrevWillNotProgress = accumPrevWillNotProgress;
  1074. isPrevWillNotRegress = accumPrevWillNotRegress;
  1075. }
  1076. void MatchCharNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1077. {
  1078. followConsumes = accumConsumes;
  1079. followSet = accumFollow;
  1080. isFollowIrrefutable = accumFollowIrrefutable;
  1081. isFollowEOL = accumFollowEOL;
  1082. }
  1083. void MatchCharNode::AnnotatePass4(Compiler& compiler)
  1084. {
  1085. isDeterministic = true;
  1086. }
  1087. bool MatchCharNode::SupportsPrefixSkipping(Compiler& compiler) const
  1088. {
  1089. return true;
  1090. }
  1091. Node* MatchCharNode::HeadSyncronizingNode(Compiler& compiler)
  1092. {
  1093. return this;
  1094. }
  1095. CharCount MatchCharNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1096. {
  1097. numLiterals++;
  1098. return 1;
  1099. }
  1100. void MatchCharNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1101. {
  1102. Assert(false);
  1103. }
  1104. void MatchCharNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1105. {
  1106. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1107. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  1108. {
  1109. bestNode = this;
  1110. }
  1111. }
  1112. void MatchCharNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1113. {
  1114. }
  1115. CompileAssert(CaseInsensitive::EquivClassSize == 4);
  1116. void MatchCharNode::Emit(Compiler& compiler, __in_ecount(4) Char * cs, bool isEquivClass)
  1117. {
  1118. if (isEquivClass)
  1119. {
  1120. Char uniqueEquivs[CaseInsensitive::EquivClassSize];
  1121. CharCount uniqueEquivCount = FindUniqueEquivs(cs, uniqueEquivs);
  1122. switch (uniqueEquivCount)
  1123. {
  1124. case 2:
  1125. EMIT(compiler, MatchChar2Inst, uniqueEquivs[0], uniqueEquivs[1]);
  1126. break;
  1127. case 3:
  1128. EMIT(compiler, MatchChar3Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2]);
  1129. break;
  1130. default:
  1131. EMIT(compiler, MatchChar4Inst, uniqueEquivs[0], uniqueEquivs[1], uniqueEquivs[2], uniqueEquivs[3]);
  1132. break;
  1133. }
  1134. }
  1135. else
  1136. EMIT(compiler, MatchCharInst, cs[0]);
  1137. }
  1138. CharCount MatchCharNode::FindUniqueEquivs(const Char equivs[CaseInsensitive::EquivClassSize], __out_ecount(4) Char uniqueEquivs[CaseInsensitive::EquivClassSize])
  1139. {
  1140. uniqueEquivs[0] = equivs[0];
  1141. CharCount uniqueCount = 1;
  1142. for (CharCount equivIndex = 1; equivIndex < CaseInsensitive::EquivClassSize; ++equivIndex)
  1143. {
  1144. bool alreadyHave = false;
  1145. for (CharCount uniqueIndex = 0; uniqueIndex < uniqueCount; ++uniqueIndex)
  1146. {
  1147. if (uniqueEquivs[uniqueIndex] == equivs[equivIndex])
  1148. {
  1149. alreadyHave = true;
  1150. break;
  1151. }
  1152. }
  1153. if (!alreadyHave)
  1154. {
  1155. uniqueEquivs[uniqueCount] = equivs[equivIndex];
  1156. uniqueCount += 1;
  1157. }
  1158. }
  1159. return uniqueCount;
  1160. }
  1161. void MatchCharNode::Emit(Compiler& compiler, CharCount& skipped)
  1162. {
  1163. if (skipped >= 1)
  1164. {
  1165. // Asking to skip entire char
  1166. skipped--;
  1167. return;
  1168. }
  1169. //
  1170. // Compilation scheme:
  1171. //
  1172. // MatchChar(2|3|4)?
  1173. //
  1174. skipped -= min(skipped, static_cast<CharCount>(1));
  1175. Emit(compiler, cs, isEquivClass);
  1176. }
  1177. CharCount MatchCharNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1178. {
  1179. //
  1180. // Compilation scheme:
  1181. //
  1182. // SyncTo(Char|Char2Set|Set)And(Consume|Continue|Backup)
  1183. //
  1184. if (isHeadSyncronizingNode)
  1185. {
  1186. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1187. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1188. if (firstSet->IsSingleton())
  1189. EMIT(compiler, SyncToCharAndConsumeInst, firstSet->Singleton());
  1190. else
  1191. EMIT(compiler, SyncToSetAndConsumeInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1192. return 1;
  1193. }
  1194. else
  1195. {
  1196. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1197. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1198. // back up, we can use SyncToCharAndContinue instead.
  1199. if (prevConsumes.IsExact(0))
  1200. {
  1201. Char entries[CharSet<Char>::MaxCompact];
  1202. int count = firstSet->GetCompactEntries(2, entries);
  1203. if (count == 1)
  1204. EMIT(compiler, SyncToCharAndContinueInst, entries[0]);
  1205. else if (count == 2)
  1206. EMIT(compiler, SyncToChar2SetAndContinueInst, entries[0], entries[1]);
  1207. else
  1208. EMIT(compiler, SyncToSetAndContinueInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1209. }
  1210. else
  1211. {
  1212. if (firstSet->IsSingleton())
  1213. EMIT(compiler, SyncToCharAndBackupInst, firstSet->Singleton(), prevConsumes);
  1214. else
  1215. EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  1216. }
  1217. return 0;
  1218. }
  1219. }
  1220. bool MatchCharNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1221. {
  1222. // We look for octoquad patterns before converting for case-insensitivity
  1223. Assert(!isEquivClass);
  1224. return oi->AppendChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), cs[0]));
  1225. }
  1226. bool MatchCharNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1227. {
  1228. if (isEquivClass)
  1229. {
  1230. accNumAlts *= CaseInsensitive::EquivClassSize;
  1231. if (accNumAlts > maxTrieArmExpansion)
  1232. return false;
  1233. }
  1234. return true;
  1235. }
  1236. bool MatchCharNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1237. {
  1238. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1239. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1240. {
  1241. if (trie->IsAccepting())
  1242. {
  1243. // An earlier literal is a prefix of this literal
  1244. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1245. // Otherwise, must fail.
  1246. return isAcceptFirst;
  1247. }
  1248. CharTrie* tail = trie->Add(compiler.ctAllocator, cs[i]);
  1249. if (trie->Count() > maxTrieArmExpansion)
  1250. return false;
  1251. if (cont == 0)
  1252. {
  1253. if (tail->Count() > 0)
  1254. // This literal is a proper prefix of an earlier literal
  1255. return false;
  1256. tail->SetAccepting();
  1257. }
  1258. else
  1259. {
  1260. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1261. return false;
  1262. }
  1263. }
  1264. return true;
  1265. }
  1266. #if ENABLE_REGEX_CONFIG_OPTIONS
  1267. void MatchCharNode::Print(DebugWriter* w, const Char* litbuf) const
  1268. {
  1269. w->Print(_u("MatchChar("));
  1270. for (int i = 0; i < (isEquivClass ? CaseInsensitive::EquivClassSize : 1); i++)
  1271. {
  1272. if (i > 0)
  1273. w->Print(_u(", "));
  1274. w->PrintQuotedChar(cs[i]);
  1275. }
  1276. w->PrintEOL(_u(")"));
  1277. PrintAnnotations(w);
  1278. }
  1279. #endif
  1280. // ----------------------------------------------------------------------
  1281. // MatchSetNode
  1282. // ----------------------------------------------------------------------
  1283. CharCount MatchSetNode::LiteralLength() const
  1284. {
  1285. return 0;
  1286. }
  1287. bool MatchSetNode::IsCharOrPositiveSet() const
  1288. {
  1289. return !isNegation;
  1290. }
  1291. CharCount MatchSetNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1292. {
  1293. return 0;
  1294. }
  1295. void MatchSetNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1296. {
  1297. if ((compiler.program->flags & IgnoreCaseRegexFlag) != 0 && this->needsEquivClass)
  1298. {
  1299. // If the set is negated, then at this point we could:
  1300. // (1) take each char in the set to its equivalence class and complement it
  1301. // (2) complement the set and take each char to its equivalence class
  1302. // Thankfully the spec demands (1), so we don't need to actually calculate any complement, just
  1303. // retain the isNegated flag
  1304. CharSet<Char> newSet;
  1305. // First include all the existing characters in the result set so that large ranges such as \x80-\uffff
  1306. // don't temporarily turn into a large number of small ranges corresponding to the various equivalent
  1307. // characters
  1308. newSet.UnionInPlace(compiler.rtAllocator, set);
  1309. set.ToEquivClass(compiler.rtAllocator, newSet);
  1310. set = newSet;
  1311. }
  1312. }
  1313. bool MatchSetNode::IsRefiningAssertion(Compiler& compiler)
  1314. {
  1315. return false;
  1316. }
  1317. void MatchSetNode::AnnotatePass0(Compiler& compiler)
  1318. {
  1319. if (isNegation || set.IsEmpty())
  1320. isWord = false;
  1321. else
  1322. isWord = set.IsSubsetOf(*compiler.standardChars->GetWordSet());
  1323. }
  1324. void MatchSetNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1325. {
  1326. features = HasMatchSet;
  1327. if (isNegation)
  1328. {
  1329. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1330. set.ToComplement(compiler.ctAllocator, *firstSet);
  1331. }
  1332. else
  1333. {
  1334. // CAUTION:
  1335. // Be careful not to use firstSet after deleting the node.
  1336. firstSet = &set;
  1337. }
  1338. isFirstExact = true;
  1339. // If firstSet is empty then pattern will always fail
  1340. thisConsumes.Exact(firstSet->IsEmpty() ? 0 : 1);
  1341. isThisIrrefutable = false;
  1342. isThisWillNotProgress = true;
  1343. isThisWillNotRegress = true;
  1344. isNotInLoop = parentNotInLoop;
  1345. isAtLeastOnce = parentAtLeastOnce;
  1346. isNotSpeculative = parentNotSpeculative;
  1347. isNotNegated = parentNotNegated;
  1348. }
  1349. void MatchSetNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1350. {
  1351. prevConsumes = accumConsumes;
  1352. isPrevWillNotProgress = accumPrevWillNotProgress;
  1353. isPrevWillNotRegress = accumPrevWillNotRegress;
  1354. }
  1355. void MatchSetNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1356. {
  1357. followConsumes = accumConsumes;
  1358. followSet = accumFollow;
  1359. isFollowIrrefutable = accumFollowIrrefutable;
  1360. isFollowEOL = accumFollowEOL;
  1361. }
  1362. void MatchSetNode::AnnotatePass4(Compiler& compiler)
  1363. {
  1364. isDeterministic = true;
  1365. }
  1366. bool MatchSetNode::SupportsPrefixSkipping(Compiler& compiler) const
  1367. {
  1368. return true;
  1369. }
  1370. Node* MatchSetNode::HeadSyncronizingNode(Compiler& compiler)
  1371. {
  1372. return this;
  1373. }
  1374. CharCount MatchSetNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1375. {
  1376. return 0;
  1377. }
  1378. void MatchSetNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1379. {
  1380. Assert(false);
  1381. }
  1382. void MatchSetNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1383. {
  1384. }
  1385. void MatchSetNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1386. {
  1387. }
  1388. void MatchSetNode::Emit(Compiler& compiler, CharCount& skipped)
  1389. {
  1390. if (skipped >= 1)
  1391. {
  1392. // Asking to skip entire set
  1393. skipped--;
  1394. return;
  1395. }
  1396. //
  1397. // Compilation scheme:
  1398. //
  1399. // MatchSet
  1400. //
  1401. skipped -= min(skipped, static_cast<CharCount>(1));
  1402. RuntimeCharSet<Char> *runtimeSet;
  1403. if(isNegation)
  1404. runtimeSet = &EMIT(compiler, MatchSetInst<true>)->set;
  1405. else
  1406. runtimeSet = &EMIT(compiler, MatchSetInst<false>)->set;
  1407. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1408. }
  1409. CharCount MatchSetNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1410. {
  1411. //
  1412. // Compilation scheme:
  1413. //
  1414. // SyncToSetAnd(Consume|Continue|Backup)
  1415. //
  1416. RuntimeCharSet<Char> *runtimeSet;
  1417. CharCount consumedChars;
  1418. if (isHeadSyncronizingNode)
  1419. {
  1420. // For a head literal there's no need to back up after finding the literal, so use a faster instruction
  1421. Assert(prevConsumes.IsExact(0)); // there should not be any consumes before this node
  1422. if(isNegation)
  1423. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<true>)->set;
  1424. else
  1425. runtimeSet = &EMIT(compiler, SyncToSetAndConsumeInst<false>)->set;
  1426. consumedChars = 1;
  1427. }
  1428. else
  1429. {
  1430. // We're synchronizing on a non-head literal so we may need to back up. Or if we're syncing to the first literal
  1431. // inside a group for instance, then we won't need to back up but we cannot consume the literal. If we don't need to
  1432. // back up, we still cannot use SyncToSetAndContinue like in MatchCharNode::EmitScan, since SyncToSetAndContinue does not support negation
  1433. // sets.
  1434. if(prevConsumes.IsExact(0))
  1435. {
  1436. if(isNegation)
  1437. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<true>)->set;
  1438. else
  1439. runtimeSet = &EMIT(compiler, SyncToSetAndContinueInst<false>)->set;
  1440. }
  1441. else if(isNegation)
  1442. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<true>, prevConsumes)->set;
  1443. else
  1444. runtimeSet = &EMIT(compiler, SyncToSetAndBackupInst<false>, prevConsumes)->set;
  1445. consumedChars = 0;
  1446. }
  1447. runtimeSet->CloneFrom(compiler.rtAllocator, set);
  1448. return consumedChars;
  1449. }
  1450. bool MatchSetNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1451. {
  1452. if (isNegation || set.IsEmpty() || !oi->BeginUnions())
  1453. return false;
  1454. Assert(CharSet<Char>::MaxCompact >= TrigramAlphabet::AlphaCount);
  1455. Char entries[CharSet<Char>::MaxCompact];
  1456. int count = set.GetCompactEntries(TrigramAlphabet::AlphaCount, entries);
  1457. if (count < 0)
  1458. // Too many unique characters
  1459. return false;
  1460. for (int i = 0; i < count; i++)
  1461. {
  1462. if (!oi->UnionChar(compiler.standardChars->ToCanonical(compiler.program->GetCaseMappingSource(), entries[i])))
  1463. // Too many unique characters
  1464. return false;
  1465. }
  1466. oi->EndUnions(); // this doesn't need to be called if we return false earlier since the OctoquadPattern won't be used
  1467. return true;
  1468. }
  1469. bool MatchSetNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1470. {
  1471. if (isNegation || !set.IsCompact())
  1472. return false;
  1473. const uint count = set.Count();
  1474. if(count == 0)
  1475. return false; // empty set always fails and consumes nothing, and therefore is not a char-trie arm
  1476. accNumAlts *= count;
  1477. if (accNumAlts > maxTrieArmExpansion)
  1478. return false;
  1479. return true;
  1480. }
  1481. bool MatchSetNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1482. {
  1483. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1484. Assert(!isNegation && set.IsCompact());
  1485. Char entries[CharSet<Char>::MaxCompact];
  1486. int count = set.GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  1487. Assert(count > 0);
  1488. for (int i = 0; i < count; i++)
  1489. {
  1490. if (trie->IsAccepting())
  1491. {
  1492. // An earlier literal is a prefix of this literal
  1493. // If isAcceptFirst, can ignore suffix of already recognized literal.
  1494. // Otherwise, must fail.
  1495. return isAcceptFirst;
  1496. }
  1497. CharTrie* tail = trie->Add(compiler.ctAllocator, entries[i]);
  1498. if (trie->Count() > maxTrieArmExpansion)
  1499. return false;
  1500. if (cont == 0)
  1501. {
  1502. if (tail->Count() > 0)
  1503. // This literal is a proper prefix of an earlier literal
  1504. return false;
  1505. tail->SetAccepting();
  1506. }
  1507. else
  1508. {
  1509. if (!cont->BuildCharTrie(compiler, tail, 0, isAcceptFirst))
  1510. return false;
  1511. }
  1512. }
  1513. return true;
  1514. }
  1515. #if ENABLE_REGEX_CONFIG_OPTIONS
  1516. void MatchSetNode::Print(DebugWriter* w, const Char* litbuf) const
  1517. {
  1518. w->Print(_u("MatchSet(%s, "), isNegation ? _u("negative") : _u("positive"));
  1519. set.Print(w);
  1520. w->PrintEOL(_u(")"));
  1521. PrintAnnotations(w);
  1522. }
  1523. #endif
  1524. // ----------------------------------------------------------------------
  1525. // ConcatNode
  1526. // ----------------------------------------------------------------------
  1527. CharCount ConcatNode::LiteralLength() const
  1528. {
  1529. return 0;
  1530. }
  1531. bool ConcatNode::IsCharOrPositiveSet() const
  1532. {
  1533. return false;
  1534. }
  1535. CharCount ConcatNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1536. {
  1537. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1538. Assert(tail != 0);
  1539. CharCount n = 0;
  1540. #if DBG
  1541. ConcatNode* prev = 0;
  1542. #endif
  1543. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1544. {
  1545. Assert(curr->head->tag != Concat);
  1546. Assert(prev == 0 || !(prev->head->LiteralLength() > 0 && curr->head->LiteralLength() > 0));
  1547. n += curr->head->TransferPass0(compiler, litbuf);
  1548. #if DBG
  1549. prev = curr;
  1550. #endif
  1551. }
  1552. return n;
  1553. }
  1554. void ConcatNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1555. {
  1556. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1557. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1558. curr->head->TransferPass1(compiler, litbuf);
  1559. }
  1560. bool ConcatNode::IsRefiningAssertion(Compiler& compiler)
  1561. {
  1562. return false;
  1563. }
  1564. void ConcatNode::AnnotatePass0(Compiler& compiler)
  1565. {
  1566. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1567. Node* prev = 0;
  1568. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1569. {
  1570. curr->head->AnnotatePass0(compiler);
  1571. if (prev != 0)
  1572. {
  1573. if (curr->head->tag == WordBoundary && prev->isWord)
  1574. {
  1575. WordBoundaryNode* wb = (WordBoundaryNode*)curr->head;
  1576. wb->mustIncludeLeaving = true;
  1577. }
  1578. else if (prev->tag == WordBoundary && curr->head->isWord)
  1579. {
  1580. WordBoundaryNode* wb = (WordBoundaryNode*)prev;
  1581. wb->mustIncludeEntering = true;
  1582. }
  1583. }
  1584. prev = curr->head;
  1585. }
  1586. }
  1587. void ConcatNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1588. {
  1589. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1590. features = HasConcat;
  1591. isNotInLoop = parentNotInLoop;
  1592. isAtLeastOnce = parentAtLeastOnce;
  1593. isNotSpeculative = parentNotSpeculative;
  1594. isNotNegated = parentNotNegated;
  1595. // Pass 1: Count items
  1596. int n = 0;
  1597. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1598. n++;
  1599. // Pass 2: Annotate each item, accumulate features, consumes, find longest prefix of possibly-empty-accepting items,
  1600. // check if all items are irrefutable
  1601. int emptyPrefix = 0;
  1602. thisConsumes.Exact(0);
  1603. isThisIrrefutable = true;
  1604. isThisWillNotProgress = true;
  1605. isThisWillNotRegress = true;
  1606. int item = 0;
  1607. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1608. {
  1609. curr->head->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, isNotNegated);
  1610. features |= curr->head->features;
  1611. thisConsumes.Add(curr->head->thisConsumes);
  1612. if (!curr->head->isThisIrrefutable)
  1613. isThisIrrefutable = false;
  1614. if (!curr->head->isThisWillNotProgress)
  1615. isThisWillNotProgress = false;
  1616. if (!curr->head->isThisWillNotRegress)
  1617. isThisWillNotRegress = false;
  1618. if (emptyPrefix == item && curr->head->thisConsumes.CouldMatchEmpty())
  1619. emptyPrefix++;
  1620. }
  1621. if (emptyPrefix == 0)
  1622. {
  1623. firstSet = head->firstSet;
  1624. isFirstExact = head->isFirstExact;
  1625. }
  1626. else
  1627. {
  1628. // Pass 3: Overall first set is union of first's of emptyPrefx
  1629. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1630. isFirstExact = true;
  1631. item = 0;
  1632. for (ConcatNode* curr = this; item <= min(emptyPrefix, n - 1); curr = curr->tail, item++)
  1633. {
  1634. AnalysisAssert(curr);
  1635. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1636. if (!curr->head->isFirstExact)
  1637. isFirstExact = false;
  1638. }
  1639. }
  1640. }
  1641. void ConcatNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1642. {
  1643. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1644. prevConsumes = accumConsumes;
  1645. isPrevWillNotProgress = accumPrevWillNotProgress;
  1646. isPrevWillNotRegress = accumPrevWillNotRegress;
  1647. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1648. {
  1649. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1650. accumConsumes.Add(curr->head->thisConsumes);
  1651. if (!curr->head->isThisWillNotProgress)
  1652. accumPrevWillNotProgress = false;
  1653. if (!curr->head->isThisWillNotRegress)
  1654. accumPrevWillNotRegress = false;
  1655. }
  1656. }
  1657. void ConcatNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1658. {
  1659. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1660. followConsumes = accumConsumes;
  1661. followSet = accumFollow;
  1662. isFollowIrrefutable = accumFollowIrrefutable;
  1663. isFollowEOL = accumFollowEOL;
  1664. // Pass 1: Count items
  1665. int n = 0;
  1666. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1667. n++;
  1668. // Pass 2: Collect items so we can enumerate backwards
  1669. Node** nodes = AnewArray(compiler.ctAllocator, Node *, n);
  1670. int item = 0;
  1671. for (ConcatNode* curr = this; curr != 0; curr = curr->tail, item++)
  1672. nodes[item] = curr->head;
  1673. // Pass 3: Work backwards propagating follow set, irrefutability and FollowEndLineOrPattern, and adding consumes
  1674. CharSet<Char>* innerFollow = accumFollow;
  1675. for (item = n - 1; item >= 0; item--)
  1676. {
  1677. nodes[item]->AnnotatePass3(compiler, accumConsumes, innerFollow, accumFollowIrrefutable, accumFollowEOL);
  1678. if (!nodes[item]->isThisIrrefutable)
  1679. accumFollowIrrefutable = false;
  1680. if (!nodes[item]->IsEmptyOnly() && (compiler.program->flags & MultilineRegexFlag) == 0)
  1681. accumFollowEOL = nodes[item]->tag == EOL;
  1682. // ConcatNode has hardfail BOI test if any child has hardfail BOI
  1683. hasInitialHardFailBOI = hasInitialHardFailBOI || nodes[item]->hasInitialHardFailBOI;
  1684. if (item > 0)
  1685. {
  1686. CharSet<Char>* nextInnerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  1687. if (nodes[item]->thisConsumes.CouldMatchEmpty() && !nodes[item]->IsRefiningAssertion(compiler))
  1688. {
  1689. // Later follows can shine through this item to the previous item
  1690. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *innerFollow);
  1691. }
  1692. nextInnerFollow->UnionInPlace(compiler.ctAllocator, *nodes[item]->firstSet);
  1693. innerFollow = nextInnerFollow;
  1694. accumConsumes.Add(nodes[item]->thisConsumes);
  1695. }
  1696. }
  1697. }
  1698. void ConcatNode::AnnotatePass4(Compiler& compiler)
  1699. {
  1700. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1701. isDeterministic = true;
  1702. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1703. {
  1704. curr->head->AnnotatePass4(compiler);
  1705. if (!curr->head->isDeterministic)
  1706. isDeterministic = false;
  1707. }
  1708. }
  1709. bool ConcatNode::SupportsPrefixSkipping(Compiler& compiler) const
  1710. {
  1711. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1712. int prefix = 0;
  1713. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1714. {
  1715. if (curr->head->SupportsPrefixSkipping(compiler))
  1716. prefix++;
  1717. else
  1718. break;
  1719. }
  1720. return prefix > 0;
  1721. }
  1722. Node* ConcatNode::HeadSyncronizingNode(Compiler& compiler)
  1723. {
  1724. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1725. return head->HeadSyncronizingNode(compiler);
  1726. }
  1727. void ConcatNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  1728. {
  1729. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  1730. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1731. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  1732. }
  1733. CharCount ConcatNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  1734. {
  1735. return 0;
  1736. }
  1737. void ConcatNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  1738. {
  1739. Assert(false);
  1740. }
  1741. void ConcatNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  1742. {
  1743. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1744. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1745. curr->head->BestSyncronizingNode(compiler, bestNode);
  1746. }
  1747. void ConcatNode::Emit(Compiler& compiler, CharCount& skipped)
  1748. {
  1749. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1750. //
  1751. // Compilation scheme:
  1752. //
  1753. // <item 1>
  1754. // ...
  1755. // <item n>
  1756. //
  1757. // :-)
  1758. for (ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1759. curr->head->Emit(compiler, skipped);
  1760. }
  1761. CharCount ConcatNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  1762. {
  1763. Assert(false);
  1764. return 0;
  1765. }
  1766. bool ConcatNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  1767. {
  1768. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1769. for (ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1770. {
  1771. if (!curr->head->IsOctoquad(compiler, oi))
  1772. return false;
  1773. }
  1774. return true;
  1775. }
  1776. bool ConcatNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  1777. {
  1778. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1779. for (const ConcatNode* curr = this; curr != 0; curr = curr->tail)
  1780. {
  1781. if (!curr->head->IsCharTrieArm(compiler, accNumAlts))
  1782. return false;
  1783. }
  1784. return true;
  1785. }
  1786. bool ConcatNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  1787. {
  1788. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1789. if (cont != 0)
  1790. // We don't want to manage a stack of continuations
  1791. return false;
  1792. // NOTE: This is the only place we use an internal node of a concat sequence as a sub-concat sequence
  1793. return head->BuildCharTrie(compiler, trie, tail, isAcceptFirst);
  1794. }
  1795. #if ENABLE_REGEX_CONFIG_OPTIONS
  1796. void ConcatNode::Print(DebugWriter* w, const Char* litbuf) const
  1797. {
  1798. w->PrintEOL(_u("Concat()"));
  1799. PrintAnnotations(w);
  1800. w->PrintEOL(_u("{"));
  1801. w->Indent();
  1802. for (const ConcatNode *curr = this; curr != 0; curr = curr->tail)
  1803. curr->head->Print(w, litbuf);
  1804. w->Unindent();
  1805. w->PrintEOL(_u("}"));
  1806. }
  1807. #endif
  1808. // ----------------------------------------------------------------------
  1809. // AltNode
  1810. // ----------------------------------------------------------------------
  1811. CharCount AltNode::LiteralLength() const
  1812. {
  1813. return 0;
  1814. }
  1815. bool AltNode::IsCharOrPositiveSet() const
  1816. {
  1817. return false;
  1818. }
  1819. CharCount AltNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  1820. {
  1821. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1822. Assert(tail != 0);
  1823. CharCount n = 0;
  1824. #if DBG
  1825. AltNode* prev = 0;
  1826. #endif
  1827. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1828. {
  1829. Assert(curr->head->tag != Alt);
  1830. Assert(prev == 0 || !(prev->head->IsCharOrPositiveSet() && curr->head->IsCharOrPositiveSet()));
  1831. n += curr->head->TransferPass0(compiler, litbuf);
  1832. #if DBG
  1833. prev = curr;
  1834. #endif
  1835. }
  1836. return n;
  1837. }
  1838. void AltNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  1839. {
  1840. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1841. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  1842. curr->head->TransferPass1(compiler, litbuf);
  1843. }
  1844. bool AltNode::IsRefiningAssertion(Compiler& compiler)
  1845. {
  1846. return false;
  1847. }
  1848. void AltNode::AnnotatePass0(Compiler& compiler)
  1849. {
  1850. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1851. isWord = true;
  1852. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1853. {
  1854. curr->head->AnnotatePass0(compiler);
  1855. if (!curr->head->isWord)
  1856. isWord = false;
  1857. }
  1858. }
  1859. void AltNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  1860. {
  1861. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1862. features = HasAlt;
  1863. isNotInLoop = parentNotInLoop;
  1864. isAtLeastOnce = parentAtLeastOnce;
  1865. isNotSpeculative = parentNotSpeculative;
  1866. isNotNegated = parentNotNegated;
  1867. // Overall alternative:
  1868. // - is irrefutable if at least one item is irrefutable
  1869. // - will not progress(regress) if each item will not progress(regress) and has strictly decreasing(increasing) consumes
  1870. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1871. isThisIrrefutable = false;
  1872. isThisWillNotProgress = true;
  1873. isThisWillNotRegress = true;
  1874. isFirstExact = true;
  1875. CountDomain prevConsumes;
  1876. int item = 0;
  1877. for (AltNode *curr = this; curr != 0; curr = curr->tail, item++)
  1878. {
  1879. curr->head->AnnotatePass1(compiler, parentNotInLoop, false, parentNotSpeculative, isNotNegated);
  1880. features |= curr->head->features;
  1881. if (!curr->head->isThisWillNotProgress)
  1882. isThisWillNotProgress = false;
  1883. if (!curr->head->isThisWillNotRegress)
  1884. isThisWillNotRegress = false;
  1885. if (item == 0)
  1886. prevConsumes = thisConsumes = curr->head->thisConsumes;
  1887. else
  1888. {
  1889. thisConsumes.Lub(curr->head->thisConsumes);
  1890. if (!curr->head->thisConsumes.IsLessThan(prevConsumes))
  1891. isThisWillNotProgress = false;
  1892. if (!curr->head->thisConsumes.IsGreaterThan(prevConsumes))
  1893. isThisWillNotRegress = false;
  1894. prevConsumes = curr->head->thisConsumes;
  1895. }
  1896. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1897. if (!curr->head->isFirstExact || curr->head->isThisIrrefutable)
  1898. // If any item is irrefutable then later items may never be taken, so first set cannot be exact
  1899. isFirstExact = false;
  1900. if (curr->head->isThisIrrefutable)
  1901. isThisIrrefutable = true;
  1902. }
  1903. }
  1904. void AltNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  1905. {
  1906. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1907. prevConsumes = accumConsumes;
  1908. isPrevWillNotProgress = accumPrevWillNotProgress;
  1909. isPrevWillNotRegress = accumPrevWillNotRegress;
  1910. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1911. curr->head->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  1912. }
  1913. void AltNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  1914. {
  1915. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1916. followConsumes = accumConsumes;
  1917. followSet = accumFollow;
  1918. isFollowIrrefutable = accumFollowIrrefutable;
  1919. isFollowEOL = accumFollowEOL;
  1920. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1921. {
  1922. curr->head->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  1923. // AltNode has hardfail BOI test if all child Nodes have hardfail BOI tests
  1924. hasInitialHardFailBOI = curr->head->hasInitialHardFailBOI && (hasInitialHardFailBOI || (curr == this));
  1925. }
  1926. }
  1927. void AltNode::AnnotatePass4(Compiler& compiler)
  1928. {
  1929. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  1930. //
  1931. // Simplification rule
  1932. //
  1933. // If the follow is irrefutable then we can ignore all items after an irrefutable item, since
  1934. // we'll never be able to backtrack into them.
  1935. // E.g.: (a*|b*)c* === a*c*
  1936. //
  1937. bool simplified = false;
  1938. if (isFollowIrrefutable && isThisIrrefutable)
  1939. {
  1940. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1941. {
  1942. if (curr->head->isThisIrrefutable && curr->tail != 0)
  1943. {
  1944. curr->tail = 0;
  1945. simplified = true;
  1946. break;
  1947. }
  1948. }
  1949. }
  1950. if (simplified)
  1951. {
  1952. Assert(!isFirstExact);
  1953. // Recalculate firstSet. Since it can only get smaller, and alternative could not have had an exact
  1954. // first set, this recalculation does not make any decisions already made based on the current firstSet
  1955. // unsound.
  1956. // NOTE: Is it worth recalculating the WillNotProgress/WillNotRegress bools?
  1957. firstSet = Anew(compiler.ctAllocator, UnicodeCharSet);
  1958. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1959. firstSet->UnionInPlace(compiler.ctAllocator, *curr->head->firstSet);
  1960. }
  1961. //
  1962. // Annotate items
  1963. //
  1964. isDeterministic = true;
  1965. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1966. {
  1967. curr->head->AnnotatePass4(compiler);
  1968. if (!curr->head->isDeterministic)
  1969. isDeterministic = false;
  1970. }
  1971. //
  1972. // Compilation scheme: Switch/Chain/Set, not isOptional
  1973. //
  1974. // If no item can match empty and all items' FIRST sets are pairwise disjoint then we can
  1975. // commit to an item using a 1 char lookahead. We can fall-through to the last
  1976. // item without guarding it since it will fail if the next character cannot match.
  1977. // E.g.: (abc|def)
  1978. //
  1979. {
  1980. // Pass 1: Items cannot match empty, accumulate counts
  1981. bool fires = true;
  1982. bool allCompact = true;
  1983. bool allSimpleOneChar = true;
  1984. int numItems = 0;
  1985. uint totalChars = 0;
  1986. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  1987. {
  1988. if (curr->head->thisConsumes.CouldMatchEmpty())
  1989. {
  1990. fires = false;
  1991. break;
  1992. }
  1993. numItems++;
  1994. if (!curr->head->firstSet->IsCompact())
  1995. allCompact = false;
  1996. if (!curr->head->IsSimpleOneChar())
  1997. allSimpleOneChar = false;
  1998. totalChars += curr->head->firstSet->Count();
  1999. }
  2000. if (fires)
  2001. {
  2002. // To go from two to one items requires the first item
  2003. // to be irrefutable, in which case it could match empty and this rule won't fire.
  2004. Assert(numItems > 1);
  2005. // Step 2: Check FIRST sets are disjoint
  2006. if (totalChars == firstSet->Count())
  2007. {
  2008. // **COMMIT**
  2009. if (allSimpleOneChar)
  2010. // This will probably never fire since the parser has already converted alts-of-chars/sets
  2011. // to sets. We include it for symmetry with below.
  2012. scheme = Set;
  2013. else if (allCompact && totalChars <= Switch20Inst::MaxCases)
  2014. {
  2015. // Can use a switch instruction to jump to item
  2016. scheme = Switch;
  2017. switchSize = totalChars;
  2018. }
  2019. else
  2020. // Must use a chain of jump instructions to jump to item
  2021. scheme = Chain;
  2022. isOptional = false;
  2023. return;
  2024. }
  2025. }
  2026. }
  2027. //
  2028. // Compilation scheme: None/Switch/Chain/Set, isOptional
  2029. //
  2030. // Condition (1):
  2031. // If some items are empty-only, the rest (if any) cannot match empty, follow cannot match empty, and
  2032. // all items' FIRST sets are pairwise disjoint and disjoint from the FOLLOW set, then we can commit to
  2033. // either a non-empty item or to the empty item using a 1 char lookahead. In this case we just emit each
  2034. // non-empty item with a guard, and fall-through to follow if no guard fires.
  2035. // E.g.: (abc||def)h
  2036. //
  2037. // Condition (2):
  2038. // If some items are empty-only, the rest (if any) cannot match empty, follow is irrefutable, and all
  2039. // items' FIRST sets are pairwise disjoint, then we can commit to either a non-empty item or to the empty
  2040. // item using a 1 char lookahead, provided each non-empty item obeys the condition:
  2041. // ** the item can't fail if given an arbitrary input starting with a character in its FIRST set **
  2042. // Currently, we can prove that only for IsSimpleOneChar items, though more analysis could widen the class.
  2043. // Again, we emit each non-empty item with a guard, and fall-through to follow if no guard fires.
  2044. // E.g.: ([abc]|)a*
  2045. //
  2046. // Condition (3):
  2047. // If all items are empty-only, we can commit to a single empty-only item
  2048. {
  2049. // Pass 1
  2050. bool fires = false;
  2051. bool allSimpleOneChar = true;
  2052. bool allCompact = true;
  2053. int numNonEmpty = 0;
  2054. uint totalChars = 0;
  2055. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2056. {
  2057. if (curr->head->IsEmptyOnly())
  2058. fires = true;
  2059. else if (curr->head->thisConsumes.CouldMatchEmpty())
  2060. {
  2061. fires = false;
  2062. break;
  2063. }
  2064. else
  2065. {
  2066. numNonEmpty++;
  2067. if (!curr->head->IsSimpleOneChar())
  2068. allSimpleOneChar = false;
  2069. if (!curr->head->firstSet->IsCompact())
  2070. allCompact = false;
  2071. totalChars += curr->head->firstSet->Count();
  2072. }
  2073. }
  2074. if (fires)
  2075. {
  2076. // The firing condition is not strong enough yet.
  2077. fires = false;
  2078. // Check conditions (2) and (3) first because they're faster, then check condition (1).
  2079. if (numNonEmpty == 0 ||
  2080. (isFollowIrrefutable && allSimpleOneChar && totalChars == firstSet->Count()))
  2081. {
  2082. fires = true;
  2083. }
  2084. else if (!followConsumes.CouldMatchEmpty())
  2085. {
  2086. // Check whether all FIRST sets are pairwise disjoint
  2087. // and disjoint from the FOLLOW set.
  2088. CharSet<Char> unionSet;
  2089. unionSet.UnionInPlace(compiler.ctAllocator, *firstSet);
  2090. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  2091. if (totalChars + followSet->Count() == unionSet.Count())
  2092. fires = true;
  2093. }
  2094. if (fires)
  2095. {
  2096. // **COMMIT**
  2097. if (numNonEmpty == 0)
  2098. scheme = None;
  2099. else if (allSimpleOneChar)
  2100. scheme = Set;
  2101. else if (numNonEmpty > 1 && allCompact && totalChars <= Switch20Inst::MaxCases)
  2102. {
  2103. switchSize = totalChars;
  2104. scheme = Switch;
  2105. }
  2106. else
  2107. scheme = Chain;
  2108. isOptional = true;
  2109. return;
  2110. }
  2111. }
  2112. }
  2113. //
  2114. // Compilation scheme: Trie
  2115. //
  2116. // If alt is equivalent to the form:
  2117. // (literal1|...|literaln)
  2118. // (we expand items with embedded character classes such as a[bc]d to (abd|acd)) and either:
  2119. // - follow is irrefutable and no later literal is a proper prefix of an earlier literal
  2120. // (and we may ignore later literals which have an earlier literal as proper prefix)
  2121. // E.g.: (ab|ac|abd)a* === (ab|ac)a*
  2122. // or:
  2123. // - follow is not irrefutable and no literal is a proper prefix of any other literal
  2124. // and the branching factor of the resulting trie is smallish
  2125. // E.g.: (abc|abd|abe)f
  2126. // then we can use a character trie to match the appropriate item.
  2127. //
  2128. {
  2129. // Pass 1: Items must be structurally appropriate and not result in too many alternatives after expansion
  2130. bool fires = true;
  2131. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2132. {
  2133. uint numAlts = 1;
  2134. if (!curr->head->IsCharTrieArm(compiler, numAlts))
  2135. {
  2136. fires = false;
  2137. break;
  2138. }
  2139. if (numAlts > maxTrieArmExpansion)
  2140. {
  2141. fires = false;
  2142. break;
  2143. }
  2144. }
  2145. if (fires)
  2146. {
  2147. // Pass 2: Attempt to construct the trie, checking for prefixes.
  2148. CharTrie trie;
  2149. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2150. {
  2151. if (!curr->head->BuildCharTrie(compiler, &trie, 0, isFollowIrrefutable))
  2152. {
  2153. fires = false;
  2154. break;
  2155. }
  2156. }
  2157. if (fires)
  2158. {
  2159. // **COMMIT**
  2160. // If follow is irrefutable and first item is empty, the trie would be of depth zero.
  2161. // However, in this case, the first simplification rule would have replaced the alt with a
  2162. // single empty item, and the 'None' compilation scheme would have been selected above.
  2163. //
  2164. // Similarly, if all alternations are empty and follow is refutable, the trie would be
  2165. // of depth zero, and the 'None' compilation scheme would have been selected above.
  2166. Assert(!trie.IsDepthZero());
  2167. if (trie.IsDepthOne())
  2168. {
  2169. // This case will fire if follow is irrefutable and all non length one items have an
  2170. // earlier one-character item as prefix. In this case we don't need the trie: the
  2171. // firstSet has all the information.
  2172. isOptional = false;
  2173. scheme = Set;
  2174. }
  2175. else
  2176. {
  2177. // Root of trie will live in compile-time allocator, but body will be in run-time allocator
  2178. runtimeTrie = Anew(compiler.ctAllocator, RuntimeCharTrie);
  2179. runtimeTrie->CloneFrom(compiler.rtAllocator, trie);
  2180. scheme = Trie;
  2181. }
  2182. return;
  2183. }
  2184. }
  2185. }
  2186. //
  2187. // Compilation scheme: Try
  2188. //
  2189. scheme = Try;
  2190. isDeterministic = false; // NON-DETERMINISTIC
  2191. }
  2192. bool AltNode::SupportsPrefixSkipping(Compiler& compiler) const
  2193. {
  2194. return false;
  2195. }
  2196. Node* AltNode::HeadSyncronizingNode(Compiler& compiler)
  2197. {
  2198. return 0;
  2199. }
  2200. CharCount AltNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2201. {
  2202. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2203. // Here, we ignore nodes with length 1, which are Char nodes. The way the Alt node synchronization
  2204. // is currently implemented, it expects all nodes to be Literal nodes. It requires quite a bit of
  2205. // refactoring to have Alt nodes support Char nodes for synchronization, so Char nodes are ignored
  2206. // for now.
  2207. int localNumLiterals = numLiterals;
  2208. CharCount minLen = head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2209. if (minLen <= 1)
  2210. return 0;
  2211. for (AltNode* curr = tail; curr != 0; curr = curr->tail)
  2212. {
  2213. CharCount thisLen = curr->head->MinSyncronizingLiteralLength(compiler, localNumLiterals);
  2214. if (thisLen <= 1)
  2215. return 0;
  2216. minLen = min(minLen, thisLen);
  2217. }
  2218. numLiterals = localNumLiterals;
  2219. if (minLen <= 1)
  2220. {
  2221. return 0;
  2222. }
  2223. return minLen;
  2224. }
  2225. void AltNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2226. {
  2227. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2228. for (const AltNode* curr = this; curr != 0; curr = curr->tail)
  2229. curr->head->CollectSyncronizingLiterals(compiler, scanners);
  2230. }
  2231. void AltNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2232. {
  2233. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2234. if (IsBetterSyncronizingNode(compiler, bestNode, this))
  2235. bestNode = this;
  2236. }
  2237. void AltNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2238. {
  2239. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  2240. for (AltNode *curr = this; curr != 0; curr = curr->tail)
  2241. curr->head->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2242. }
  2243. void AltNode::Emit(Compiler& compiler, CharCount& skipped)
  2244. {
  2245. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2246. Assert(skipped == 0);
  2247. switch (scheme)
  2248. {
  2249. case Try:
  2250. {
  2251. //
  2252. // Compilation scheme:
  2253. //
  2254. // Try((If|Match)(Char|Set))? L2
  2255. // <item 1>
  2256. // Jump Lexit
  2257. // L2: Try((If|Match)(Char|Set))? L3
  2258. // <item 2>
  2259. // Jump Lexit
  2260. // L3: <item 3>
  2261. // Lexit:
  2262. //
  2263. Assert(!isOptional);
  2264. int numItems = 0;
  2265. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2266. numItems++;
  2267. Assert(numItems >= 1);
  2268. // Each item other than last needs to jump to exit on success
  2269. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2270. Label lastTryFixup = 0;
  2271. int item = 0;
  2272. for (AltNode* curr = this; curr != 0; curr = curr->tail, item++)
  2273. {
  2274. if (item > 0)
  2275. // Fixup previous Try
  2276. compiler.DoFixup(lastTryFixup, compiler.CurrentLabel());
  2277. CharCount itemSkipped = 0;
  2278. if (item < numItems-1)
  2279. {
  2280. // HEURISTIC: if the first set of the alternative is exact or small, and the
  2281. // alternative does not match empty, then it's probably worth using
  2282. // a Try(If|Match)(Char|Set)
  2283. if (curr->head->firstSet != 0 &&
  2284. !curr->head->thisConsumes.CouldMatchEmpty() &&
  2285. (curr->head->isFirstExact || curr->head->firstSet->Count() <= maxCharsForConditionalTry))
  2286. {
  2287. if (curr->head->SupportsPrefixSkipping(compiler))
  2288. {
  2289. if (curr->head->firstSet->IsSingleton())
  2290. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, curr->head->firstSet->Singleton())->failLabel);
  2291. else
  2292. {
  2293. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  2294. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2295. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2296. }
  2297. itemSkipped = 1;
  2298. }
  2299. else
  2300. {
  2301. if (curr->head->firstSet->IsSingleton())
  2302. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, curr->head->firstSet->Singleton())->failLabel);
  2303. else
  2304. {
  2305. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  2306. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2307. lastTryFixup = compiler.GetFixup(&i->failLabel);
  2308. }
  2309. }
  2310. }
  2311. else
  2312. lastTryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  2313. }
  2314. curr->head->Emit(compiler, itemSkipped);
  2315. if (item < numItems-1)
  2316. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2317. }
  2318. // Fixup jumps
  2319. for (item = 0; item < numItems-1; item++)
  2320. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2321. break;
  2322. }
  2323. case None:
  2324. {
  2325. Assert(isOptional);
  2326. // Nothing to emit
  2327. break;
  2328. }
  2329. case Trie:
  2330. {
  2331. //
  2332. // Compilation scheme:
  2333. //
  2334. // MatchTrie <trie>
  2335. //
  2336. EMIT(compiler, MatchTrieInst)->trie = *runtimeTrie;
  2337. break;
  2338. }
  2339. case Switch:
  2340. {
  2341. //
  2342. // Compilation scheme:
  2343. //
  2344. // Switch(AndConsume)?(10|20)(<dispatch to each arm>)
  2345. // Fail (if non-optional)
  2346. // Jump Lexit (if optional)
  2347. // L1: <item1>
  2348. // Jump Lexit
  2349. // L2: <item2>
  2350. // Jump Lexit
  2351. // L3: <item3>
  2352. // Lexit:
  2353. //
  2354. Assert(switchSize <= Switch20Inst::MaxCases);
  2355. int numItems = 0;
  2356. bool allCanSkip = true;
  2357. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2358. {
  2359. if (curr->head->thisConsumes.CouldMatchEmpty())
  2360. {
  2361. Assert(isOptional);
  2362. }
  2363. else
  2364. {
  2365. numItems++;
  2366. if (!curr->head->SupportsPrefixSkipping(compiler))
  2367. allCanSkip = false;
  2368. }
  2369. }
  2370. Assert(numItems > 1);
  2371. // Each item other than last needs to jump to exit on success
  2372. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2373. // We must remember where each item begins to fixup switch
  2374. Label* caseLabels = AnewArray(compiler.ctAllocator, Label, numItems);
  2375. // We must fixup the switch arms
  2376. Label switchLabel = compiler.CurrentLabel();
  2377. Assert(switchSize <= Switch20Inst::MaxCases);
  2378. if (allCanSkip)
  2379. {
  2380. if (switchSize > Switch10Inst::MaxCases)
  2381. EMIT(compiler, SwitchAndConsume20Inst);
  2382. else
  2383. EMIT(compiler, SwitchAndConsume10Inst);
  2384. }
  2385. else
  2386. {
  2387. if (switchSize > Switch10Inst::MaxCases)
  2388. EMIT(compiler, Switch20Inst);
  2389. else
  2390. EMIT(compiler, Switch10Inst);
  2391. }
  2392. Label defaultJumpFixup = 0;
  2393. if (isOptional)
  2394. // Must fixup default jump to exit
  2395. defaultJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2396. else
  2397. compiler.Emit<FailInst>();
  2398. // Emit each item
  2399. int item = 0;
  2400. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2401. {
  2402. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2403. {
  2404. if (allCanSkip)
  2405. skipped = 1;
  2406. caseLabels[item] = compiler.CurrentLabel();
  2407. curr->head->Emit(compiler, skipped);
  2408. if (item < numItems - 1)
  2409. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2410. item++;
  2411. }
  2412. }
  2413. // Fixup exit labels
  2414. if (isOptional)
  2415. compiler.DoFixup(defaultJumpFixup, compiler.CurrentLabel());
  2416. for (item = 0; item < numItems - 1; item++)
  2417. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2418. // Fixup the switch entries
  2419. item = 0;
  2420. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2421. {
  2422. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2423. {
  2424. Char entries[CharSet<Char>::MaxCompact];
  2425. int count = curr->head->firstSet->GetCompactEntries(CharSet<Char>::MaxCompact, entries);
  2426. Assert(count > 0);
  2427. for (int i = 0; i < count; i++)
  2428. {
  2429. if (allCanSkip)
  2430. {
  2431. if (switchSize > Switch10Inst::MaxCases)
  2432. compiler.L2I(SwitchAndConsume20, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2433. else
  2434. compiler.L2I(SwitchAndConsume10, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2435. }
  2436. else
  2437. {
  2438. if (switchSize > Switch10Inst::MaxCases)
  2439. compiler.L2I(Switch20, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2440. else
  2441. compiler.L2I(Switch10, switchLabel)->AddCase(entries[i], caseLabels[item]);
  2442. }
  2443. }
  2444. item++;
  2445. }
  2446. }
  2447. break;
  2448. }
  2449. case Chain:
  2450. {
  2451. //
  2452. // Compilation scheme:
  2453. //
  2454. // JumpIfNot(Char|Set) L2
  2455. // <item1>
  2456. // Jump Lexit
  2457. // L2: JumpIfNot(Char|Set) L3
  2458. // <item2>
  2459. // Jump Lexit
  2460. // L3: <item3> (if non-optional)
  2461. // L3: JumpIfNot(Char|Set) Lexit (if optional)
  2462. // <item3> (if optional)
  2463. // Lexit:
  2464. //
  2465. int numItems = 0;
  2466. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2467. {
  2468. if (curr->head->thisConsumes.CouldMatchEmpty())
  2469. {
  2470. Assert(isOptional);
  2471. }
  2472. else
  2473. numItems++;
  2474. }
  2475. Assert(numItems > 0);
  2476. // Each item other than last needs to jump to exit on success
  2477. Label* jumpFixups = AnewArray(compiler.ctAllocator, Label, (numItems - 1));
  2478. Label lastJumpFixup = 0;
  2479. int item = 0;
  2480. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2481. {
  2482. if (!curr->head->thisConsumes.CouldMatchEmpty())
  2483. {
  2484. if (item > 0)
  2485. // Fixup previous Jump
  2486. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2487. CharCount itemSkipped = 0;
  2488. if (item < numItems-1 || isOptional)
  2489. {
  2490. if (curr->head->firstSet->IsSingleton())
  2491. {
  2492. if (curr->head->SupportsPrefixSkipping(compiler))
  2493. {
  2494. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, curr->head->firstSet->Singleton())->targetLabel);
  2495. itemSkipped = 1;
  2496. }
  2497. else
  2498. lastJumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, curr->head->firstSet->Singleton())->targetLabel);
  2499. }
  2500. else
  2501. {
  2502. if (curr->head->SupportsPrefixSkipping(compiler))
  2503. {
  2504. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  2505. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2506. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2507. itemSkipped = 1;
  2508. }
  2509. else
  2510. {
  2511. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  2512. i->set.CloneFrom(compiler.rtAllocator, *curr->head->firstSet);
  2513. lastJumpFixup = compiler.GetFixup(&i->targetLabel);
  2514. }
  2515. }
  2516. }
  2517. curr->head->Emit(compiler, itemSkipped);
  2518. if (item < numItems-1)
  2519. jumpFixups[item] = compiler.GetFixup(&EMIT(compiler, JumpInst)->targetLabel);
  2520. item++;
  2521. }
  2522. }
  2523. // Fixup jumps to exit
  2524. for (item = 0; item < numItems-1; item++)
  2525. compiler.DoFixup(jumpFixups[item], compiler.CurrentLabel());
  2526. if (isOptional)
  2527. // Fixup last Jump to exit
  2528. compiler.DoFixup(lastJumpFixup, compiler.CurrentLabel());
  2529. break;
  2530. }
  2531. case Set:
  2532. {
  2533. //
  2534. // Compilation scheme:
  2535. //
  2536. // Match(Char|Set) (non optional)
  2537. // OptMatch(Char|Set) (optional)
  2538. //
  2539. if (isOptional)
  2540. {
  2541. if (firstSet->IsSingleton())
  2542. EMIT(compiler, OptMatchCharInst, firstSet->Singleton());
  2543. else
  2544. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2545. }
  2546. else
  2547. {
  2548. if (firstSet->IsSingleton())
  2549. EMIT(compiler, MatchCharInst, firstSet->Singleton());
  2550. else
  2551. EMIT(compiler, MatchSetInst<false>)->set.CloneFrom(compiler.rtAllocator, *firstSet);
  2552. }
  2553. break;
  2554. }
  2555. }
  2556. }
  2557. CharCount AltNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2558. {
  2559. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2560. Assert(!isHeadSyncronizingNode);
  2561. //
  2562. // Compilation scheme:
  2563. //
  2564. // SyncToLiteralsAndBackup
  2565. //
  2566. SyncToLiteralsAndBackupInst* i =
  2567. EMIT(
  2568. compiler,
  2569. SyncToLiteralsAndBackupInst,
  2570. compiler.GetScriptContext()->GetRecycler(),
  2571. compiler.GetProgram(),
  2572. prevConsumes);
  2573. CollectSyncronizingLiterals(compiler, *i);
  2574. return 0;
  2575. }
  2576. bool AltNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2577. {
  2578. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2579. if (tail == 0 || tail->tail != 0)
  2580. // Must be exactly two alts
  2581. return false;
  2582. for (AltNode* curr = this; curr != 0; curr = curr->tail)
  2583. {
  2584. if (!oi->BeginConcat())
  2585. return false;
  2586. if (!curr->head->IsOctoquad(compiler, oi))
  2587. return false;
  2588. }
  2589. return true;
  2590. }
  2591. bool AltNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2592. {
  2593. return false;
  2594. }
  2595. bool AltNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2596. {
  2597. Assert(false);
  2598. return false;
  2599. }
  2600. #if ENABLE_REGEX_CONFIG_OPTIONS
  2601. void AltNode::Print(DebugWriter* w, const Char* litbuf) const
  2602. {
  2603. w->PrintEOL(_u("Alt()"));
  2604. PrintAnnotations(w);
  2605. w->PrintEOL(_u("{"));
  2606. w->Indent();
  2607. for (const AltNode *curr = this; curr != 0; curr = curr->tail)
  2608. curr->head->Print(w, litbuf);
  2609. w->Unindent();
  2610. w->PrintEOL(_u("}"));
  2611. }
  2612. #endif
  2613. // ----------------------------------------------------------------------
  2614. // DefineGroupNode
  2615. // ----------------------------------------------------------------------
  2616. CharCount DefineGroupNode::LiteralLength() const
  2617. {
  2618. return 0;
  2619. }
  2620. bool DefineGroupNode::IsCharOrPositiveSet() const
  2621. {
  2622. return false;
  2623. }
  2624. CharCount DefineGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2625. {
  2626. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2627. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  2628. return body->TransferPass0(compiler, litbuf);
  2629. }
  2630. void DefineGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2631. {
  2632. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2633. body->TransferPass1(compiler, litbuf);
  2634. }
  2635. bool DefineGroupNode::IsRefiningAssertion(Compiler& compiler)
  2636. {
  2637. return false;
  2638. }
  2639. void DefineGroupNode::AnnotatePass0(Compiler& compiler)
  2640. {
  2641. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2642. body->AnnotatePass0(compiler);
  2643. isWord = body->isWord;
  2644. }
  2645. void DefineGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2646. {
  2647. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2648. features = HasDefineGroup;
  2649. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, parentNotSpeculative, parentNotNegated);
  2650. features |= body->features;
  2651. thisConsumes = body->thisConsumes;
  2652. firstSet = body->firstSet;
  2653. isFirstExact = body->isFirstExact;
  2654. isThisIrrefutable = body->isThisIrrefutable;
  2655. isThisWillNotProgress = body->isThisWillNotProgress;
  2656. isThisWillNotRegress = body->isThisWillNotRegress;
  2657. isNotInLoop = parentNotInLoop;
  2658. isAtLeastOnce = parentAtLeastOnce;
  2659. isNotSpeculative = parentNotSpeculative;
  2660. isNotNegated = parentNotNegated;
  2661. }
  2662. void DefineGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  2663. {
  2664. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2665. prevConsumes = accumConsumes;
  2666. isPrevWillNotProgress = accumPrevWillNotProgress;
  2667. isPrevWillNotRegress = accumPrevWillNotRegress;
  2668. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  2669. }
  2670. void DefineGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  2671. {
  2672. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2673. followConsumes = accumConsumes;
  2674. followSet = accumFollow;
  2675. isFollowIrrefutable = accumFollowIrrefutable;
  2676. isFollowEOL = accumFollowEOL;
  2677. body->AnnotatePass3(compiler, accumConsumes, accumFollow, accumFollowIrrefutable, accumFollowEOL);
  2678. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  2679. }
  2680. void DefineGroupNode::AnnotatePass4(Compiler& compiler)
  2681. {
  2682. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2683. body->AnnotatePass4(compiler);
  2684. isDeterministic = body->isDeterministic;
  2685. // If the follow is irrefutable and we're not in an assertion, then we are not going to backtrack beyond this point, so
  2686. // we don't need to save the group before updating it
  2687. noNeedToSave = isFollowIrrefutable && isNotSpeculative;
  2688. // Compilation scheme: Chomp
  2689. //
  2690. // Body consists of a loop node with a Chomp compilation scheme.
  2691. if(body->tag == NodeTag::Loop)
  2692. {
  2693. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2694. if(loop->scheme == LoopNode::CompilationScheme::Chomp && loop->repeats.lower <= 1 && loop->repeats.IsUnbounded())
  2695. {
  2696. // **COMMIT**
  2697. scheme = Chomp;
  2698. return;
  2699. }
  2700. }
  2701. // Compilation scheme: Fixed
  2702. //
  2703. // Body has fixed width, so don't need a Begin instruction to keep track of the input start offset of the group.
  2704. if (body->thisConsumes.IsFixed())
  2705. {
  2706. // **COMMIT**
  2707. scheme = Fixed;
  2708. return;
  2709. }
  2710. // Compilation scheme: BeginEnd
  2711. //
  2712. // If both the body and the follow are irrefutable, we're not in any loops, and we're not in an assertion,
  2713. // then we don't need to save the group before updating it.
  2714. // **COMMIT**
  2715. scheme = BeginEnd;
  2716. }
  2717. bool DefineGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  2718. {
  2719. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2720. if (scheme != Fixed)
  2721. // We can't skip over part of the match if the BeginDefineGroup must capture it's start
  2722. return false;
  2723. return body->SupportsPrefixSkipping(compiler);
  2724. }
  2725. Node* DefineGroupNode::HeadSyncronizingNode(Compiler& compiler)
  2726. {
  2727. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2728. if (scheme != Fixed)
  2729. // Can't skip BeginDefineGroup
  2730. return 0;
  2731. return body->HeadSyncronizingNode(compiler);
  2732. }
  2733. CharCount DefineGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2734. {
  2735. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2736. return body->MinSyncronizingLiteralLength(compiler, numLiterals);
  2737. }
  2738. void DefineGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2739. {
  2740. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2741. body->CollectSyncronizingLiterals(compiler, scanners);
  2742. }
  2743. void DefineGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2744. {
  2745. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2746. body->BestSyncronizingNode(compiler, bestNode);
  2747. }
  2748. void DefineGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2749. {
  2750. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  2751. if (groupId < minGroup)
  2752. minGroup = groupId;
  2753. if (groupId > maxGroup)
  2754. maxGroup = groupId;
  2755. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  2756. }
  2757. void DefineGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  2758. {
  2759. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2760. switch (scheme)
  2761. {
  2762. case Chomp:
  2763. {
  2764. // Compilation scheme:
  2765. //
  2766. // Chomp(Char|Set)Group
  2767. Assert(body->tag == NodeTag::Loop);
  2768. const LoopNode *const loop = static_cast<const LoopNode *>(body);
  2769. const CharSet<Char> *const loopBodyFirstSet = loop->body->firstSet;
  2770. const CountDomain &repeats = loop->repeats;
  2771. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2772. if(loopBodyFirstSet->IsSingleton())
  2773. {
  2774. const Char c = loopBodyFirstSet->Singleton();
  2775. if(repeats.lower == 0)
  2776. EMIT(compiler, ChompCharGroupInst<ChompMode::Star>, c, groupId, noNeedToSave);
  2777. else
  2778. EMIT(compiler, ChompCharGroupInst<ChompMode::Plus>, c, groupId, noNeedToSave);
  2779. }
  2780. else
  2781. {
  2782. Assert(repeats.lower <= 1 && repeats.IsUnbounded());
  2783. RuntimeCharSet<Char> *runtimeSet;
  2784. if(repeats.lower == 0)
  2785. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Star>, groupId, noNeedToSave)->set;
  2786. else
  2787. runtimeSet = &EMIT(compiler, ChompSetGroupInst<ChompMode::Plus>, groupId, noNeedToSave)->set;
  2788. runtimeSet->CloneFrom(compiler.rtAllocator, *loopBodyFirstSet);
  2789. }
  2790. break;
  2791. }
  2792. case Fixed:
  2793. {
  2794. // Compilation scheme:
  2795. //
  2796. // <body>
  2797. // DefineGroup
  2798. Assert(body->thisConsumes.IsFixed());
  2799. body->Emit(compiler, skipped);
  2800. EMIT(compiler, DefineGroupFixedInst, groupId, body->thisConsumes.lower, noNeedToSave);
  2801. break;
  2802. }
  2803. case BeginEnd:
  2804. {
  2805. // Compilation scheme:
  2806. //
  2807. // BeginDefineGroup
  2808. // <body>
  2809. // EndDefineGroup
  2810. Assert(skipped == 0);
  2811. EMIT(compiler, BeginDefineGroupInst, groupId);
  2812. body->Emit(compiler, skipped);
  2813. EMIT(compiler, EndDefineGroupInst, groupId, noNeedToSave);
  2814. break;
  2815. }
  2816. }
  2817. }
  2818. CharCount DefineGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2819. {
  2820. Assert(false);
  2821. return 0;
  2822. }
  2823. bool DefineGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2824. {
  2825. return false;
  2826. }
  2827. bool DefineGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2828. {
  2829. return false;
  2830. }
  2831. bool DefineGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2832. {
  2833. Assert(false);
  2834. return false;
  2835. }
  2836. #if ENABLE_REGEX_CONFIG_OPTIONS
  2837. void DefineGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  2838. {
  2839. w->PrintEOL(_u("DefineGroup(%d)"), groupId);
  2840. PrintAnnotations(w);
  2841. w->PrintEOL(_u("{"));
  2842. w->Indent();
  2843. body->Print(w, litbuf);
  2844. w->Unindent();
  2845. w->PrintEOL(_u("}"));
  2846. }
  2847. #endif
  2848. // ----------------------------------------------------------------------
  2849. // MatchGroupNode
  2850. // ----------------------------------------------------------------------
  2851. CharCount MatchGroupNode::LiteralLength() const
  2852. {
  2853. return 0;
  2854. }
  2855. bool MatchGroupNode::IsCharOrPositiveSet() const
  2856. {
  2857. return false;
  2858. }
  2859. CharCount MatchGroupNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2860. {
  2861. Assert(groupId > 0 && groupId < compiler.program->numGroups);
  2862. return 0;
  2863. }
  2864. void MatchGroupNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2865. {
  2866. }
  2867. bool MatchGroupNode::IsRefiningAssertion(Compiler& compiler)
  2868. {
  2869. return false;
  2870. }
  2871. void MatchGroupNode::AnnotatePass0(Compiler& compiler)
  2872. {
  2873. isWord = false;
  2874. }
  2875. void MatchGroupNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2876. {
  2877. features = HasMatchGroup;
  2878. thisConsumes.lower = 0;
  2879. thisConsumes.upper = CharCountFlag;
  2880. firstSet = compiler.standardChars->GetFullSet();
  2881. isFirstExact = false;
  2882. isThisIrrefutable = false;
  2883. isThisWillNotProgress = true;
  2884. isThisWillNotRegress = true;
  2885. isNotInLoop = parentNotInLoop;
  2886. isAtLeastOnce = parentAtLeastOnce;
  2887. isNotSpeculative = parentNotSpeculative;
  2888. isNotNegated = parentNotNegated;
  2889. }
  2890. void MatchGroupNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  2891. {
  2892. prevConsumes = accumConsumes;
  2893. isPrevWillNotProgress = accumPrevWillNotProgress;
  2894. isPrevWillNotRegress = accumPrevWillNotRegress;
  2895. }
  2896. void MatchGroupNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  2897. {
  2898. followConsumes = accumConsumes;
  2899. followSet = accumFollow;
  2900. isFollowIrrefutable = accumFollowIrrefutable;
  2901. isFollowEOL = accumFollowEOL;
  2902. }
  2903. void MatchGroupNode::AnnotatePass4(Compiler& compiler)
  2904. {
  2905. isDeterministic = true;
  2906. }
  2907. bool MatchGroupNode::SupportsPrefixSkipping(Compiler& compiler) const
  2908. {
  2909. return false;
  2910. }
  2911. Node* MatchGroupNode::HeadSyncronizingNode(Compiler& compiler)
  2912. {
  2913. return 0;
  2914. }
  2915. CharCount MatchGroupNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  2916. {
  2917. return 0;
  2918. }
  2919. void MatchGroupNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  2920. {
  2921. Assert(false);
  2922. }
  2923. void MatchGroupNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  2924. {
  2925. }
  2926. void MatchGroupNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  2927. {
  2928. }
  2929. void MatchGroupNode::Emit(Compiler& compiler, CharCount& skipped)
  2930. {
  2931. Assert(skipped == 0);
  2932. //
  2933. // Compilation scheme:
  2934. //
  2935. // MatchGroup
  2936. //
  2937. EMIT(compiler, MatchGroupInst, groupId);
  2938. }
  2939. CharCount MatchGroupNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  2940. {
  2941. Assert(false);
  2942. return 0;
  2943. }
  2944. bool MatchGroupNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  2945. {
  2946. return false;
  2947. }
  2948. bool MatchGroupNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  2949. {
  2950. return false;
  2951. }
  2952. bool MatchGroupNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  2953. {
  2954. Assert(false);
  2955. return false;
  2956. }
  2957. #if ENABLE_REGEX_CONFIG_OPTIONS
  2958. void MatchGroupNode::Print(DebugWriter* w, const Char* litbuf) const
  2959. {
  2960. w->PrintEOL(_u("MatchGroup(%d)"), groupId);
  2961. PrintAnnotations(w);
  2962. }
  2963. #endif
  2964. // ----------------------------------------------------------------------
  2965. // LoopNode
  2966. // ----------------------------------------------------------------------
  2967. CharCount LoopNode::LiteralLength() const
  2968. {
  2969. return 0;
  2970. }
  2971. bool LoopNode::IsCharOrPositiveSet() const
  2972. {
  2973. return false;
  2974. }
  2975. CharCount LoopNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  2976. {
  2977. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2978. Assert(repeats.upper == CharCountFlag || repeats.upper > 0);
  2979. Assert(repeats.upper == CharCountFlag || repeats.upper >= repeats.lower);
  2980. Assert(!(repeats.lower == 1 && repeats.upper == 1));
  2981. return body->TransferPass0(compiler, litbuf);
  2982. }
  2983. void LoopNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  2984. {
  2985. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2986. body->TransferPass1(compiler, litbuf);
  2987. }
  2988. bool LoopNode::IsRefiningAssertion(Compiler& compiler)
  2989. {
  2990. return false;
  2991. }
  2992. void LoopNode::AnnotatePass0(Compiler& compiler)
  2993. {
  2994. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  2995. body->AnnotatePass0(compiler);
  2996. isWord = !repeats.CouldMatchEmpty() && body->isWord;
  2997. }
  2998. void LoopNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  2999. {
  3000. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3001. features = HasLoop;
  3002. isNotInLoop = parentNotInLoop;
  3003. isAtLeastOnce = parentAtLeastOnce;
  3004. isNotSpeculative = parentNotSpeculative;
  3005. isNotNegated = parentNotNegated;
  3006. body->AnnotatePass1(compiler, false, parentAtLeastOnce && repeats.lower > 0, parentNotSpeculative, isNotNegated);
  3007. features |= body->features;
  3008. thisConsumes = body->thisConsumes;
  3009. thisConsumes.Mult(repeats);
  3010. firstSet = body->firstSet;
  3011. isFirstExact = repeats.lower > 0 && body->isFirstExact;
  3012. isThisIrrefutable = repeats.CouldMatchEmpty() || body->isThisIrrefutable;
  3013. // Caution: Even if a greedy loop has a 'isThisWillNotProgress' body, if the body has choicepoints then
  3014. // a backtrack could resume execution at an earlier loop iteration, which may then continue to repeat
  3015. // the loop beyond the input offset which triggered the backtrack. Ideally we'd use the body's isDeterministic
  3016. // flag to tell us when that can't happen, but it's not available till pass 4, so we must make do with
  3017. // a simple-minded structural approximation.
  3018. isThisWillNotProgress = (isGreedy || repeats.IsExact(1)) && body->isThisWillNotProgress && body->IsObviouslyDeterministic();
  3019. isThisWillNotRegress = (!isGreedy || repeats.IsExact(1)) && body->isThisWillNotRegress && body->IsObviouslyDeterministic();
  3020. }
  3021. void LoopNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3022. {
  3023. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3024. prevConsumes = accumConsumes;
  3025. isPrevWillNotProgress = accumPrevWillNotProgress;
  3026. isPrevWillNotRegress = accumPrevWillNotRegress;
  3027. // May have already gone through loop when starting body
  3028. CountDomain bodyConsumes = body->thisConsumes;
  3029. CharCountOrFlag prevMax = repeats.upper;
  3030. if (prevMax != CharCountFlag)
  3031. prevMax--;
  3032. CountDomain prevLoops(0, prevMax);
  3033. bodyConsumes.Mult(prevLoops);
  3034. accumConsumes.Add(bodyConsumes);
  3035. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress && isThisWillNotProgress, accumPrevWillNotRegress && isThisWillNotRegress);
  3036. }
  3037. void LoopNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3038. {
  3039. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3040. followConsumes = accumConsumes;
  3041. followSet = accumFollow;
  3042. isFollowIrrefutable = accumFollowIrrefutable;
  3043. isFollowEOL = accumFollowEOL;
  3044. // May go through loop again when leaving body
  3045. CountDomain bodyConsumes = body->thisConsumes;
  3046. CharCountOrFlag nextMax = repeats.upper;
  3047. if (nextMax != CharCountFlag)
  3048. nextMax--;
  3049. CountDomain nextLoops(0, nextMax);
  3050. bodyConsumes.Mult(nextLoops);
  3051. accumConsumes.Add(bodyConsumes);
  3052. CharSet<Char>* innerFollow = Anew(compiler.ctAllocator, UnicodeCharSet);
  3053. innerFollow->UnionInPlace(compiler.ctAllocator, *accumFollow);
  3054. innerFollow->UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3055. if (followSet->IsSingleton())
  3056. {
  3057. Assert(followSet->IsCompact());
  3058. followFirst = followSet->GetCompactChar(0);
  3059. }
  3060. /*
  3061. All of the following must be true for the loop body's follow to be irrefutable:
  3062. The loop's follow is irrefutable.
  3063. The loop can complete the required minimum number of iterations of the body without backtracking into a completed
  3064. iteration of the body.
  3065. - If repeats.lower == 0, the required minimum number of iterations is met without executing the body
  3066. - If repeats.lower == 1
  3067. - If the first iteration of the body fails, there is no previous iteration of the body to backtrack into
  3068. - After completing the first iteration of the body, the loop cannot reject the first iteration for not
  3069. making progress because the iteration is required for the loop to succeed
  3070. - If repeats.lower >= 2
  3071. - If the second iteration of the body fails, it will backtrack into the first iteration of the body
  3072. - To prevent this, the body must be irrefutable
  3073. After completing the required minimum number of iterations of the body, the loop cannot reject a subsequent
  3074. completed iteration of the body for not making progress.
  3075. - If !isGreedy || repeats.IsFixed(), there will not be any more iterations of the body, as it will proceed to
  3076. the irrefutable follow
  3077. - If !body->thisConsumes.CouldMatchEmpty(), subsequent iterations of the body cannot complete without making
  3078. progress
  3079. */
  3080. const bool isBodyFollowIrrefutable =
  3081. accumFollowIrrefutable &&
  3082. (repeats.lower <= 1 || body->isThisIrrefutable) &&
  3083. (!isGreedy || !body->thisConsumes.CouldMatchEmpty() || repeats.IsFixed());
  3084. body->AnnotatePass3(compiler, accumConsumes, innerFollow, isBodyFollowIrrefutable, false);
  3085. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3086. }
  3087. void LoopNode::AnnotatePass4(Compiler& compiler)
  3088. {
  3089. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3090. body->AnnotatePass4(compiler);
  3091. isDeterministic = body->isDeterministic;
  3092. //
  3093. // Loops can be defined by unfolding:
  3094. // r* === (rr*|)
  3095. // r*? === (|rr*?)
  3096. // Thus many of the optimizations for alternatives carry over to loops.
  3097. //
  3098. //
  3099. // Compilation scheme: None
  3100. //
  3101. // If overall loop is empty-only then emit nothing.
  3102. // (Parser has already eliminated loops with upper == 0, so this can only happen if the body is empty-only)
  3103. //
  3104. // If loop is non-greedy with lower 0 and follow is irrefutable, then loop body will never be executed
  3105. // no emit nothing.
  3106. //
  3107. if (body->IsEmptyOnly() ||
  3108. (!isGreedy && repeats.lower == 0 && isFollowIrrefutable))
  3109. {
  3110. // **COMMIT**
  3111. scheme = None;
  3112. return;
  3113. }
  3114. //
  3115. // Compilation scheme: Chain/Try
  3116. //
  3117. // If loop is greedy, with lower 0 and upper 1, then we'd like to treat it as body|<empty> so as to avoid all the loop
  3118. // overhead. However, if the body could match empty, then a match against empty must be treated as an 'iteration' of the
  3119. // loop body which made no progress. So we treat as a general loop in that case. Otherwise, we may inline two of
  3120. // AltNode's compilation schemes:
  3121. // Examples:
  3122. // - /(a*)?/.exec("") must leave group 1 undefined rather than empty.
  3123. // - /(?:a||b)?/.exec("b") chooses the empty alt, then must backtrack due to no progress, and match 'b'.
  3124. // This is not the same as /a||b|/, as picking the first empty alt would result in success.
  3125. //
  3126. // (cf AltNode's None/Switch/Chain/Set, isOptional compilation scheme)
  3127. // If body cannot match empty, follow cannot match empty, and the body FIRST set is disjoint from the FOLLOW
  3128. // set, then we can commit to the body using a 1 char lookahead.
  3129. //
  3130. // If body cannot match empty, and follow is irrefutable, then we can commit to the body using a 1 char
  3131. // lookahead provided:
  3132. // ** the body can't fail if given an arbitrary input starting with a character in its FIRST set **
  3133. //
  3134. // (cf AltNode's Try compilation scheme)
  3135. // Otherwise, protect body by a Try instruction.
  3136. //
  3137. if (isGreedy && repeats.lower == 0 && repeats.upper == 1 && !body->thisConsumes.CouldMatchEmpty())
  3138. {
  3139. // **COMMIT**
  3140. // Note that the FIRST of the loop is already the union of the body FIRST and the loop FOLLOW
  3141. if (!body->thisConsumes.CouldMatchEmpty() &&
  3142. ((!followConsumes.CouldMatchEmpty() && firstSet->Count() == body->firstSet->Count() + followSet->Count()) ||
  3143. (isFollowIrrefutable && body->IsSimpleOneChar())))
  3144. {
  3145. if (body->IsSimpleOneChar())
  3146. scheme = Set;
  3147. else
  3148. scheme = Chain;
  3149. }
  3150. else
  3151. {
  3152. scheme = Try;
  3153. isDeterministic = false; // NON-DETERMINISTIC
  3154. }
  3155. return;
  3156. }
  3157. //
  3158. // Compilation scheme: Chomp/ChompGroupLastChar
  3159. //
  3160. // If the body is a simple-one-char, or a group of a simple-one-char, and either:
  3161. // - follow is non-empty and FIRST and FOLLOW are disjoint
  3162. // - loop is greedy and follow is irrefutable
  3163. // - follow is EOL
  3164. // then consume up to upper number of characters in FIRST and fail if number consumed is not >= lower.
  3165. //
  3166. if (body->IsSimpleOneChar() || (body->tag == DefineGroup && ((DefineGroupNode*)body)->body->IsSimpleOneChar()))
  3167. {
  3168. if (!followConsumes.CouldMatchEmpty())
  3169. {
  3170. CharSet<Char> unionSet;
  3171. CharCount totalChars = 0;
  3172. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3173. totalChars += body->firstSet->Count();
  3174. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3175. totalChars += followSet->Count();
  3176. if (totalChars == unionSet.Count())
  3177. {
  3178. // **COMMIT**
  3179. if (body->tag == DefineGroup)
  3180. {
  3181. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3182. scheme = ChompGroupLastChar;
  3183. }
  3184. else
  3185. scheme = Chomp;
  3186. return;
  3187. }
  3188. }
  3189. if ((isGreedy && isFollowIrrefutable) || isFollowEOL)
  3190. {
  3191. // **COMMIT**
  3192. if (body->tag == DefineGroup)
  3193. {
  3194. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3195. scheme = ChompGroupLastChar;
  3196. }
  3197. else
  3198. scheme = Chomp;
  3199. return;
  3200. }
  3201. }
  3202. //
  3203. // Compilation scheme: Guarded
  3204. //
  3205. // If body cannot match empty, follow cannot match empty, and FIRST of body and FOLLOW are
  3206. // disjoint then can use 1 char lookahead to decide whether to commit to another loop body.
  3207. // (If the loop body fails then we know the follow will fail even with one more/fewer iterations of the
  3208. // loop body, so we can let that failure propagate without needing to push choicepoints.)
  3209. //
  3210. if (!body->thisConsumes.CouldMatchEmpty() && !followConsumes.CouldMatchEmpty())
  3211. {
  3212. CharSet<Char> unionSet;
  3213. CharCount totalChars = 0;
  3214. unionSet.UnionInPlace(compiler.ctAllocator, *body->firstSet);
  3215. totalChars += body->firstSet->Count();
  3216. unionSet.UnionInPlace(compiler.ctAllocator, *followSet);
  3217. totalChars += followSet->Count();
  3218. if (totalChars == unionSet.Count())
  3219. {
  3220. // **COMMIT**
  3221. scheme = Guarded;
  3222. return;
  3223. }
  3224. }
  3225. //
  3226. // Compilation scheme: Fixed/FixedSet/FixedGroupLastIteration
  3227. //
  3228. // If loop is greedy, body is deterministic, non-zero fixed width, and either does not define any groups
  3229. // or has one outermost group, then we can keep track of the backtracking information in constant space.
  3230. //
  3231. // If body does have an outer group, we can avoid saving the existing group contents if the follow
  3232. // is irrefutable, we're not in an outer loop, and we're not in an assertion.
  3233. //
  3234. if (isGreedy && body->isDeterministic && !body->thisConsumes.CouldMatchEmpty() && body->thisConsumes.IsFixed())
  3235. {
  3236. if (body->tag == DefineGroup)
  3237. {
  3238. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3239. if (!bodyGroup->body->ContainsDefineGroup())
  3240. {
  3241. // **COMMIT**
  3242. scheme = FixedGroupLastIteration;
  3243. noNeedToSave = isFollowIrrefutable && isNotInLoop && isNotSpeculative;
  3244. isDeterministic = false; // NON-DETERMINISTIC;
  3245. return;
  3246. }
  3247. }
  3248. else if (body->IsSimpleOneChar())
  3249. {
  3250. // **COMMIT**
  3251. scheme = FixedSet;
  3252. isDeterministic = false; // NON-DETERMINISTIC
  3253. return;
  3254. }
  3255. else if (!body->ContainsDefineGroup())
  3256. {
  3257. // **COMMIT**
  3258. scheme = Fixed;
  3259. isDeterministic = false; // NON-DETERMINISTIC
  3260. return;
  3261. }
  3262. }
  3263. //
  3264. // Compilation scheme: GreedyNoBacktrack
  3265. //
  3266. // If loop is greedy with lower == 0 and upper == inf, the loop body is deterministic and does not define
  3267. // groups, and follow is irrefutable, then we will never have to try fewer iterations of the loop once
  3268. // entering the follow. Thus we only need one continuation record on the stack to protect against failure
  3269. // for each attempt at the loop body.
  3270. //
  3271. if (isGreedy && repeats.lower == 0 && repeats.upper == CharCountFlag && body->isDeterministic && !body->ContainsDefineGroup() && isFollowIrrefutable)
  3272. {
  3273. // **COMMIT**
  3274. scheme = GreedyNoBacktrack;
  3275. return;
  3276. }
  3277. //
  3278. // Compilation scheme: BeginEnd
  3279. //
  3280. scheme = BeginEnd;
  3281. isDeterministic = false; // NON-DETERMINISTIC
  3282. }
  3283. bool LoopNode::SupportsPrefixSkipping(Compiler& compiler) const
  3284. {
  3285. return false;
  3286. }
  3287. Node* LoopNode::HeadSyncronizingNode(Compiler& compiler)
  3288. {
  3289. return 0;
  3290. }
  3291. CharCount LoopNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3292. {
  3293. return 0;
  3294. }
  3295. void LoopNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3296. {
  3297. Assert(false);
  3298. }
  3299. void LoopNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3300. {
  3301. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3302. if (repeats.lower > 0)
  3303. body->BestSyncronizingNode(compiler, bestNode);
  3304. // else: can't be sure loop will be taken
  3305. }
  3306. void LoopNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3307. {
  3308. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  3309. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3310. }
  3311. void LoopNode::Emit(Compiler& compiler, CharCount& skipped)
  3312. {
  3313. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3314. Assert(skipped == 0);
  3315. switch (scheme)
  3316. {
  3317. case BeginEnd:
  3318. {
  3319. //
  3320. // Compilation scheme:
  3321. //
  3322. // Lloop: BeginLoop Lexit
  3323. // <loop body>
  3324. // RepeatLoop Lloop
  3325. // Lexit:
  3326. //
  3327. int minBodyGroupId = compiler.program->numGroups;
  3328. int maxBodyGroupId = -1;
  3329. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3330. Label beginLabel = compiler.CurrentLabel();
  3331. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId, isGreedy)->exitLabel);
  3332. body->Emit(compiler, skipped);
  3333. EMIT(compiler, RepeatLoopInst, beginLabel);
  3334. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3335. break;
  3336. }
  3337. case None:
  3338. {
  3339. // Nothing to emit
  3340. break;
  3341. }
  3342. case Chomp:
  3343. {
  3344. //
  3345. // Compilation scheme:
  3346. //
  3347. // Chomp(Char|Set)(Star|Plus|Bounded)
  3348. //
  3349. if(body->firstSet->IsSingleton())
  3350. {
  3351. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3352. {
  3353. if(repeats.lower == 0)
  3354. EMIT(compiler, ChompCharInst<ChompMode::Star>, body->firstSet->Singleton());
  3355. else
  3356. EMIT(compiler, ChompCharInst<ChompMode::Plus>, body->firstSet->Singleton());
  3357. }
  3358. else
  3359. EMIT(compiler, ChompCharBoundedInst, body->firstSet->Singleton(), repeats);
  3360. }
  3361. else
  3362. {
  3363. if(repeats.lower <= 1 && repeats.IsUnbounded())
  3364. {
  3365. if(repeats.lower == 0)
  3366. EMIT(compiler, ChompSetInst<ChompMode::Star>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3367. else
  3368. EMIT(compiler, ChompSetInst<ChompMode::Plus>)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3369. }
  3370. else
  3371. EMIT(compiler, ChompSetBoundedInst, repeats)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3372. }
  3373. break;
  3374. }
  3375. case ChompGroupLastChar:
  3376. {
  3377. //
  3378. // Compilation scheme:
  3379. //
  3380. // ChompSetGroup
  3381. //
  3382. Assert(body->tag == DefineGroup);
  3383. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3384. EMIT(compiler, ChompSetBoundedGroupLastCharInst, repeats, bodyGroup->groupId, noNeedToSave)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3385. break;
  3386. }
  3387. case Guarded:
  3388. {
  3389. //
  3390. // Compilation scheme:
  3391. //
  3392. // Lloop: BeginLoopIf(Char|Set) Lexit
  3393. // <loop body>
  3394. // RepeatLoopIf(Char|Set) Lloop
  3395. // Lexit:
  3396. //
  3397. int minBodyGroupId = compiler.program->numGroups;
  3398. int maxBodyGroupId = -1;
  3399. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3400. Label beginLabel = compiler.CurrentLabel();
  3401. Label exitFixup;
  3402. if (body->firstSet->IsSingleton())
  3403. exitFixup = compiler.GetFixup(&EMIT(compiler, BeginLoopIfCharInst, body->firstSet->Singleton(), compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId)->exitLabel);
  3404. else
  3405. {
  3406. BeginLoopIfSetInst* i = EMIT(compiler, BeginLoopIfSetInst, compiler.NextLoopId(), repeats, !isNotInLoop, !body->isDeterministic, minBodyGroupId, maxBodyGroupId);
  3407. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3408. exitFixup = compiler.GetFixup(&i->exitLabel);
  3409. }
  3410. body->Emit(compiler, skipped);
  3411. if (body->firstSet->IsSingleton())
  3412. EMIT(compiler, RepeatLoopIfCharInst, beginLabel);
  3413. else
  3414. EMIT(compiler, RepeatLoopIfSetInst, beginLabel);
  3415. compiler.DoFixup(exitFixup, compiler.CurrentLabel());
  3416. break;
  3417. }
  3418. case Fixed:
  3419. {
  3420. //
  3421. // Compilation scheme:
  3422. //
  3423. // Lloop: BeginLoopFixed Lexit
  3424. // <loop body>
  3425. // RepeatLoopFixed Lloop
  3426. // Lexit:
  3427. //
  3428. Assert(!body->ContainsDefineGroup());
  3429. Assert(body->thisConsumes.IsFixed());
  3430. Assert(body->thisConsumes.lower > 0);
  3431. Assert(body->isDeterministic);
  3432. Label beginLabel = compiler.CurrentLabel();
  3433. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower)->exitLabel);
  3434. body->Emit(compiler, skipped);
  3435. EMIT(compiler, RepeatLoopFixedInst, beginLabel);
  3436. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3437. break;
  3438. }
  3439. case FixedSet:
  3440. {
  3441. //
  3442. // Compilation scheme:
  3443. //
  3444. // LoopSet
  3445. //
  3446. Assert(body->IsSimpleOneChar());
  3447. if (followFirst == MaxChar || PHASE_OFF1(Js::RegexOptBTPhase))
  3448. {
  3449. EMIT(compiler, LoopSetInst, compiler.NextLoopId(), repeats, !isNotInLoop)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3450. }
  3451. else
  3452. {
  3453. EMIT(compiler, LoopSetWithFollowFirstInst, compiler.NextLoopId(), repeats, !isNotInLoop, followFirst)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3454. }
  3455. break;
  3456. }
  3457. case FixedGroupLastIteration:
  3458. {
  3459. //
  3460. // Compilation scheme:
  3461. //
  3462. // Lloop: BeginLoopFixedGroupLastIteration Lexit
  3463. // <loop body>
  3464. // RepeatLoopFixedGroupLastIteration Lloop
  3465. // Lexit:
  3466. //
  3467. Assert(body->tag == DefineGroup);
  3468. DefineGroupNode* bodyGroup = (DefineGroupNode*)body;
  3469. Assert(body->thisConsumes.IsFixed());
  3470. Assert(body->thisConsumes.lower > 0);
  3471. Assert(body->isDeterministic);
  3472. Label beginLabel = compiler.CurrentLabel();
  3473. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginLoopFixedGroupLastIterationInst, compiler.NextLoopId(), repeats, !isNotInLoop, body->thisConsumes.lower, bodyGroup->groupId, noNeedToSave)->exitLabel);
  3474. bodyGroup->body->Emit(compiler, skipped);
  3475. EMIT(compiler, RepeatLoopFixedGroupLastIterationInst, beginLabel);
  3476. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3477. break;
  3478. }
  3479. case GreedyNoBacktrack:
  3480. {
  3481. //
  3482. // Compilation scheme:
  3483. //
  3484. // Lloop: BeginGreedyLoopNoBacktrack Lexit
  3485. // <loop body>
  3486. // RepeatGreedyLoopNoBacktrack Lloop
  3487. // Lexit:
  3488. //
  3489. Assert(!body->ContainsDefineGroup());
  3490. Assert(isGreedy);
  3491. Assert(repeats.lower == 0);
  3492. Assert(repeats.upper == CharCountFlag);
  3493. Assert(body->isDeterministic);
  3494. Label beginLabel = compiler.CurrentLabel();
  3495. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginGreedyLoopNoBacktrackInst, compiler.NextLoopId())->exitLabel);
  3496. body->Emit(compiler, skipped);
  3497. EMIT(compiler, RepeatGreedyLoopNoBacktrackInst, beginLabel);
  3498. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3499. break;
  3500. }
  3501. case Set:
  3502. {
  3503. //
  3504. // Compilation scheme:
  3505. //
  3506. // OptMatch(Char|Set)
  3507. //
  3508. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3509. if (body->firstSet->IsSingleton())
  3510. EMIT(compiler, OptMatchCharInst, body->firstSet->Singleton());
  3511. else
  3512. EMIT(compiler, OptMatchSetInst)->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3513. break;
  3514. }
  3515. case Chain:
  3516. {
  3517. //
  3518. // Compilation scheme:
  3519. //
  3520. // JumpIfNot(Char|Set) Lexit
  3521. // <body>
  3522. // Lexit:
  3523. //
  3524. //
  3525. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3526. Label jumpFixup = 0;
  3527. CharCount bodySkipped = 0;
  3528. if (body->firstSet->IsSingleton())
  3529. {
  3530. if (body->SupportsPrefixSkipping(compiler))
  3531. {
  3532. jumpFixup = compiler.GetFixup(&EMIT(compiler, MatchCharOrJumpInst, body->firstSet->Singleton())->targetLabel);
  3533. bodySkipped = 1;
  3534. }
  3535. else
  3536. jumpFixup = compiler.GetFixup(&EMIT(compiler, JumpIfNotCharInst, body->firstSet->Singleton())->targetLabel);
  3537. }
  3538. else
  3539. {
  3540. if (body->SupportsPrefixSkipping(compiler))
  3541. {
  3542. MatchSetOrJumpInst* const i = EMIT(compiler, MatchSetOrJumpInst);
  3543. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3544. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3545. bodySkipped = 1;
  3546. }
  3547. else
  3548. {
  3549. JumpIfNotSetInst* const i = EMIT(compiler, JumpIfNotSetInst);
  3550. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3551. jumpFixup = compiler.GetFixup(&i->targetLabel);
  3552. }
  3553. }
  3554. body->Emit(compiler, bodySkipped);
  3555. compiler.DoFixup(jumpFixup, compiler.CurrentLabel());
  3556. break;
  3557. }
  3558. case Try:
  3559. {
  3560. //
  3561. // Compilation scheme:
  3562. //
  3563. // Try((If|Match)(Char|Set))? Lexit
  3564. // <loop body>
  3565. // Lexit:
  3566. //
  3567. Assert(!body->ContainsDefineGroup() || !body->thisConsumes.CouldMatchEmpty());
  3568. Label tryFixup = 0;
  3569. CharCount bodySkipped = 0;
  3570. // HEURISTIC: if the first set of the body is exact or small, and the
  3571. // body does not match empty, then it's probably worth using
  3572. // a Try(If|Match)(Char|Set)
  3573. if (!body->thisConsumes.CouldMatchEmpty() &&
  3574. (body->isFirstExact || body->firstSet->Count() <= maxCharsForConditionalTry))
  3575. {
  3576. if (body->SupportsPrefixSkipping(compiler))
  3577. {
  3578. if (body->firstSet->IsSingleton())
  3579. tryFixup = compiler.GetFixup(&EMIT(compiler, TryMatchCharInst, body->firstSet->Singleton())->failLabel);
  3580. else
  3581. {
  3582. TryMatchSetInst* const i = EMIT(compiler, TryMatchSetInst);
  3583. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3584. tryFixup = compiler.GetFixup(&i->failLabel);
  3585. }
  3586. bodySkipped = 1;
  3587. }
  3588. else
  3589. {
  3590. if(body->firstSet->IsSingleton())
  3591. tryFixup = compiler.GetFixup(&EMIT(compiler, TryIfCharInst, body->firstSet->Singleton())->failLabel);
  3592. else
  3593. {
  3594. TryIfSetInst* const i = EMIT(compiler, TryIfSetInst);
  3595. i->set.CloneFrom(compiler.rtAllocator, *body->firstSet);
  3596. tryFixup = compiler.GetFixup(&i->failLabel);
  3597. }
  3598. }
  3599. }
  3600. else
  3601. tryFixup = compiler.GetFixup(&EMIT(compiler, TryInst)->failLabel);
  3602. body->Emit(compiler, bodySkipped);
  3603. // Fixup Try
  3604. compiler.DoFixup(tryFixup, compiler.CurrentLabel());
  3605. break;
  3606. }
  3607. }
  3608. }
  3609. CharCount LoopNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3610. {
  3611. Assert(false);
  3612. return 0;
  3613. }
  3614. bool LoopNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3615. {
  3616. return false;
  3617. }
  3618. bool LoopNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3619. {
  3620. return false;
  3621. }
  3622. bool LoopNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3623. {
  3624. Assert(false);
  3625. return false;
  3626. }
  3627. #if ENABLE_REGEX_CONFIG_OPTIONS
  3628. void LoopNode::Print(DebugWriter* w, const Char* litbuf) const
  3629. {
  3630. w->Print(_u("Loop("));
  3631. repeats.Print(w);
  3632. w->PrintEOL(_u(", %s)"), isGreedy ? _u("greedy") : _u("non-greedy"));
  3633. PrintAnnotations(w);
  3634. w->PrintEOL(_u("{"));
  3635. w->Indent();
  3636. body->Print(w, litbuf);
  3637. w->Unindent();
  3638. w->PrintEOL(_u("}"));
  3639. }
  3640. #endif
  3641. // ----------------------------------------------------------------------
  3642. // AssertionNode
  3643. // ----------------------------------------------------------------------
  3644. CharCount AssertionNode::LiteralLength() const
  3645. {
  3646. return 0;
  3647. }
  3648. bool AssertionNode::IsCharOrPositiveSet() const
  3649. {
  3650. return false;
  3651. }
  3652. CharCount AssertionNode::TransferPass0(Compiler& compiler, const Char* litbuf)
  3653. {
  3654. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3655. return body->TransferPass0(compiler, litbuf);
  3656. }
  3657. void AssertionNode::TransferPass1(Compiler& compiler, const Char* litbuf)
  3658. {
  3659. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3660. body->TransferPass1(compiler, litbuf);
  3661. }
  3662. bool AssertionNode::IsRefiningAssertion(Compiler& compiler)
  3663. {
  3664. return !isNegation;
  3665. }
  3666. void AssertionNode::AnnotatePass0(Compiler& compiler)
  3667. {
  3668. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3669. isWord = false;
  3670. body->AnnotatePass0(compiler);
  3671. }
  3672. void AssertionNode::AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated)
  3673. {
  3674. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3675. features = HasAssertion;
  3676. body->AnnotatePass1(compiler, parentNotInLoop, parentAtLeastOnce, false, parentNotNegated && !isNegation);
  3677. features |= body->features;
  3678. thisConsumes.Exact(0);
  3679. if (isNegation)
  3680. firstSet = compiler.standardChars->GetFullSet();
  3681. else
  3682. firstSet = body->firstSet;
  3683. isFirstExact = false;
  3684. if (isNegation)
  3685. // This will always fail
  3686. isThisIrrefutable = false;
  3687. else
  3688. // If body is irrefutable overall assertion is irrefutable
  3689. isThisIrrefutable = body->isThisIrrefutable;
  3690. isThisWillNotProgress = true;
  3691. isThisWillNotRegress = true;
  3692. isNotInLoop = parentNotInLoop;
  3693. isAtLeastOnce = parentAtLeastOnce;
  3694. isNotSpeculative = parentNotSpeculative;
  3695. isNotNegated = parentNotNegated;
  3696. }
  3697. void AssertionNode::AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress)
  3698. {
  3699. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3700. prevConsumes = accumConsumes;
  3701. isPrevWillNotProgress = accumPrevWillNotProgress;
  3702. isPrevWillNotRegress = accumPrevWillNotRegress;
  3703. body->AnnotatePass2(compiler, accumConsumes, accumPrevWillNotProgress, accumPrevWillNotRegress);
  3704. }
  3705. void AssertionNode::AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL)
  3706. {
  3707. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3708. followConsumes = accumConsumes;
  3709. followSet = accumFollow;
  3710. isFollowIrrefutable = accumFollowIrrefutable;
  3711. isFollowEOL = accumFollowEOL;
  3712. // Can't say anything about what the assertion body will see at its end
  3713. CountDomain innerConsumes;
  3714. CharSet<Char>* innerFollow = compiler.standardChars->GetFullSet();
  3715. // We can never backtrack into the body of an assertion (the continuation stack is cut)
  3716. body->AnnotatePass3(compiler, innerConsumes, innerFollow, true, false);
  3717. hasInitialHardFailBOI = body->hasInitialHardFailBOI;
  3718. }
  3719. void AssertionNode::AnnotatePass4(Compiler& compiler)
  3720. {
  3721. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3722. body->AnnotatePass4(compiler);
  3723. // Even if body is non-deterministic we cut the choicepoints on exit from the assertion,
  3724. // so overall assertion is deterministic.
  3725. isDeterministic = true;
  3726. //
  3727. // Compilation scheme: Fail
  3728. //
  3729. // If body is irrefutable, assertion will always fail (and will leave groups empty).
  3730. //
  3731. if (isNegation && body->isThisIrrefutable)
  3732. {
  3733. // ***COMMIT***
  3734. scheme = Fail;
  3735. return;
  3736. }
  3737. //
  3738. // Compilation scheme: Succ
  3739. //
  3740. // If body is irrefutable, assertion will always succeed. If it does not define groups
  3741. // we can eliminate it altogether.
  3742. //
  3743. if (!isNegation && body->isThisIrrefutable && !body->ContainsDefineGroup())
  3744. {
  3745. // **COMMIT**
  3746. scheme = Succ;
  3747. return;
  3748. }
  3749. //
  3750. // Compilation scheme: BeginEnd
  3751. //
  3752. scheme = BeginEnd;
  3753. }
  3754. bool AssertionNode::SupportsPrefixSkipping(Compiler& compiler) const
  3755. {
  3756. return false;
  3757. }
  3758. Node* AssertionNode::HeadSyncronizingNode(Compiler& compiler)
  3759. {
  3760. return 0;
  3761. }
  3762. CharCount AssertionNode::MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const
  3763. {
  3764. return 0;
  3765. }
  3766. void AssertionNode::CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const
  3767. {
  3768. Assert(false);
  3769. }
  3770. void AssertionNode::BestSyncronizingNode(Compiler& compiler, Node*& bestNode)
  3771. {
  3772. }
  3773. void AssertionNode::AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup)
  3774. {
  3775. PROBE_STACK_NO_DISPOSE(scriptContext, Js::Constants::MinStackRegex);
  3776. body->AccumDefineGroups(scriptContext, minGroup, maxGroup);
  3777. }
  3778. void AssertionNode::Emit(Compiler& compiler, CharCount& skipped)
  3779. {
  3780. PROBE_STACK_NO_DISPOSE(compiler.scriptContext, Js::Constants::MinStackRegex);
  3781. Assert(skipped == 0);
  3782. switch (scheme)
  3783. {
  3784. case BeginEnd:
  3785. {
  3786. //
  3787. // Compilation scheme:
  3788. //
  3789. // BeginAssertion Lexit
  3790. // <body>
  3791. // EndAssertion
  3792. // Lexit:
  3793. //
  3794. int minBodyGroupId = compiler.program->numGroups;
  3795. int maxBodyGroupId = -1;
  3796. body->AccumDefineGroups(compiler.scriptContext, minBodyGroupId, maxBodyGroupId);
  3797. Label fixup = compiler.GetFixup(&EMIT(compiler, BeginAssertionInst, isNegation, minBodyGroupId, maxBodyGroupId)->nextLabel);
  3798. body->Emit(compiler, skipped);
  3799. EMIT(compiler, EndAssertionInst);
  3800. compiler.DoFixup(fixup, compiler.CurrentLabel());
  3801. break;
  3802. }
  3803. case Succ:
  3804. {
  3805. // Nothing to emit
  3806. break;
  3807. }
  3808. case Fail:
  3809. {
  3810. //
  3811. // Compilation scheme:
  3812. //
  3813. // Fail
  3814. //
  3815. EMIT(compiler, FailInst);
  3816. break;
  3817. }
  3818. }
  3819. }
  3820. CharCount AssertionNode::EmitScan(Compiler& compiler, bool isHeadSyncronizingNode)
  3821. {
  3822. Assert(false);
  3823. return 0;
  3824. }
  3825. bool AssertionNode::IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi)
  3826. {
  3827. return false;
  3828. }
  3829. bool AssertionNode::IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const
  3830. {
  3831. return false;
  3832. }
  3833. bool AssertionNode::BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const
  3834. {
  3835. Assert(false);
  3836. return false;
  3837. }
  3838. #if ENABLE_REGEX_CONFIG_OPTIONS
  3839. void AssertionNode::Print(DebugWriter* w, const Char* litbuf) const
  3840. {
  3841. w->PrintEOL(_u("Assertion(%s)"), isNegation ? _u("negative") : _u("positive"));
  3842. PrintAnnotations(w);
  3843. w->PrintEOL(_u("{"));
  3844. w->Indent();
  3845. body->Print(w, litbuf);
  3846. w->Unindent();
  3847. w->PrintEOL(_u("}"));
  3848. }
  3849. #endif
  3850. // ----------------------------------------------------------------------
  3851. // Compiler
  3852. // ----------------------------------------------------------------------
  3853. Compiler::Compiler
  3854. ( Js::ScriptContext* scriptContext
  3855. , ArenaAllocator* ctAllocator
  3856. , ArenaAllocator* rtAllocator
  3857. , StandardChars<Char>* standardChars
  3858. , Program* program
  3859. #if ENABLE_REGEX_CONFIG_OPTIONS
  3860. , DebugWriter* w
  3861. , RegexStats* stats
  3862. #endif
  3863. )
  3864. : scriptContext(scriptContext)
  3865. , ctAllocator(ctAllocator)
  3866. , rtAllocator(rtAllocator)
  3867. , standardChars(standardChars)
  3868. #if ENABLE_REGEX_CONFIG_OPTIONS
  3869. , w(w)
  3870. , stats(stats)
  3871. #endif
  3872. , program(program)
  3873. , instBuf(0)
  3874. , instLen(0)
  3875. , instNext(0)
  3876. , nextLoopId(0)
  3877. {}
  3878. void Compiler::CaptureNoLiterals(Program* program)
  3879. {
  3880. program->rep.insts.litbuf = nullptr;
  3881. program->rep.insts.litbufLen = 0;
  3882. }
  3883. void Compiler::CaptureLiterals(Node* root, const Char* litbuf)
  3884. {
  3885. // Program will own literal buffer. Prepare buffer and nodes for case-invariant matching if necessary.
  3886. CharCount finalLen = root->TransferPass0(*this, litbuf);
  3887. program->rep.insts.litbuf = finalLen == 0 ? 0 : RecyclerNewArrayLeaf(scriptContext->GetRecycler(), Char, finalLen);
  3888. program->rep.insts.litbufLen = 0;
  3889. root->TransferPass1(*this, litbuf);
  3890. Assert(program->rep.insts.litbufLen == finalLen);
  3891. }
  3892. void Compiler::EmitAndCaptureSuccInst(Recycler* recycler, Program* program)
  3893. {
  3894. program->rep.insts.insts = (uint8*)RecyclerNewLeaf(recycler, SuccInst);
  3895. program->rep.insts.instsLen = sizeof(SuccInst);
  3896. program->numLoops = 0;
  3897. }
  3898. void Compiler::CaptureInsts()
  3899. {
  3900. program->rep.insts.insts = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), uint8, instNext);
  3901. program->rep.insts.instsLen = instNext;
  3902. memcpy_s(program->rep.insts.insts, instNext, instBuf, instNext);
  3903. program->numLoops = nextLoopId;
  3904. }
  3905. void Compiler::FreeBody()
  3906. {
  3907. if (instBuf != 0)
  3908. {
  3909. ctAllocator->Free(instBuf, instLen);
  3910. instBuf = 0;
  3911. instLen = 0;
  3912. instNext = 0;
  3913. }
  3914. }
  3915. void Compiler::CompileEmptyRegex
  3916. ( Program* program
  3917. , RegexPattern* pattern
  3918. #if ENABLE_REGEX_CONFIG_OPTIONS
  3919. , DebugWriter* w
  3920. , RegexStats* stats
  3921. #endif
  3922. )
  3923. {
  3924. program->tag = Program::InstructionsTag;
  3925. CaptureNoLiterals(program);
  3926. EmitAndCaptureSuccInst(pattern->GetScriptContext()->GetRecycler(), program);
  3927. }
  3928. void Compiler::Compile
  3929. ( Js::ScriptContext* scriptContext
  3930. , ArenaAllocator* ctAllocator
  3931. , ArenaAllocator* rtAllocator
  3932. , StandardChars<Char>* standardChars
  3933. , Program *program
  3934. , Node* root
  3935. , const Char* litbuf
  3936. , RegexPattern* pattern
  3937. #if ENABLE_REGEX_CONFIG_OPTIONS
  3938. , DebugWriter* w
  3939. , RegexStats* stats
  3940. #endif
  3941. )
  3942. {
  3943. #if ENABLE_REGEX_CONFIG_OPTIONS
  3944. if (w != 0)
  3945. {
  3946. w->PrintEOL(_u("REGEX AST /%s/ {"), PointerValue(program->source));
  3947. w->Indent();
  3948. root->Print(w, litbuf);
  3949. w->Unindent();
  3950. w->PrintEOL(_u("}"));
  3951. w->Flush();
  3952. }
  3953. #endif
  3954. Compiler compiler
  3955. ( scriptContext
  3956. , ctAllocator
  3957. , rtAllocator
  3958. , standardChars
  3959. , program
  3960. #if ENABLE_REGEX_CONFIG_OPTIONS
  3961. , w
  3962. , stats
  3963. #endif
  3964. );
  3965. bool compiled = false;
  3966. if (REGEX_CONFIG_FLAG(RegexOptimize))
  3967. {
  3968. // SPECIAL CASE: Octoquad/trigrams
  3969. // (must handle before converting to case-insensitive form since the later interferes with octoquad pattern recognizer)
  3970. if (OctoquadIdentifier::Qualifies(program))
  3971. {
  3972. int numCodes;
  3973. char localCodeToChar[TrigramAlphabet::AlphaCount];
  3974. char localCharToCode[TrigramAlphabet::AsciiTableSize];
  3975. char (*codeToChar)[TrigramAlphabet::AlphaCount];
  3976. char (*charToCode)[TrigramAlphabet::AsciiTableSize];
  3977. TrigramAlphabet *trigramAlphabet = scriptContext->GetTrigramAlphabet();
  3978. if(trigramAlphabet)
  3979. {
  3980. numCodes = TrigramAlphabet::AlphaCount;
  3981. codeToChar = &trigramAlphabet->alpha;
  3982. charToCode = &trigramAlphabet->alphaBits;
  3983. }
  3984. else
  3985. {
  3986. numCodes = 0;
  3987. codeToChar = &localCodeToChar;
  3988. charToCode = &localCharToCode;
  3989. }
  3990. OctoquadIdentifier oi(numCodes, *codeToChar, *charToCode);
  3991. // We haven't captured literals yet: temporarily set the program's litbuf to be the parser's litbuf
  3992. Assert(program->rep.insts.litbuf == nullptr);
  3993. program->rep.insts.litbuf = (Char*)litbuf;
  3994. if (root->IsOctoquad(compiler, &oi) && oi.IsOctoquad())
  3995. {
  3996. program->rep.insts.litbuf = nullptr;
  3997. oi.InitializeTrigramInfo(scriptContext, pattern);
  3998. program->tag = Program::OctoquadTag;
  3999. program->rep.octoquad.matcher = OctoquadMatcher::New(scriptContext->GetRecycler(), standardChars, program->GetCaseMappingSource(), &oi);
  4000. compiled = true;
  4001. }
  4002. else
  4003. program->rep.insts.litbuf = nullptr;
  4004. }
  4005. }
  4006. if (!compiled)
  4007. {
  4008. if (REGEX_CONFIG_FLAG(RegexOptimize))
  4009. {
  4010. Char c = 0;
  4011. if (root->IsSingleChar(compiler, c))
  4012. {
  4013. // SPECIAL CASE: c
  4014. program->tag = Program::SingleCharTag;
  4015. program->rep.singleChar.c = c;
  4016. }
  4017. else if (root->IsBoundedWord(compiler))
  4018. {
  4019. // SPECIAL CASE: \b\w+\b
  4020. program->tag = Program::BoundedWordTag;
  4021. }
  4022. else if (root->IsLeadingTrailingSpaces(compiler,
  4023. program->rep.leadingTrailingSpaces.beginMinMatch,
  4024. program->rep.leadingTrailingSpaces.endMinMatch))
  4025. {
  4026. // SPECIAL CASE: ^\s*|\s*$
  4027. program->tag = Program::LeadingTrailingSpacesTag;
  4028. }
  4029. else if (root->IsBOILiteral2(compiler))
  4030. {
  4031. program->tag = Program::BOILiteral2Tag;
  4032. program->rep.boiLiteral2.literal = *(DWORD *)litbuf;
  4033. }
  4034. else
  4035. {
  4036. program->tag = Program::InstructionsTag;
  4037. compiler.CaptureLiterals(root, litbuf);
  4038. root->AnnotatePass0(compiler);
  4039. root->AnnotatePass1(compiler, true, true, true, true);
  4040. // Nothing comes before or after overall pattern
  4041. CountDomain consumes(0);
  4042. // Match could progress from lhs (since we try successive start positions), but can never regress
  4043. root->AnnotatePass2(compiler, consumes, false, true);
  4044. // Anything could follow an end of pattern match
  4045. CharSet<Char>* follow = standardChars->GetFullSet();
  4046. root->AnnotatePass3(compiler, consumes, follow, true, false);
  4047. root->AnnotatePass4(compiler);
  4048. #if ENABLE_REGEX_CONFIG_OPTIONS
  4049. if (w != 0)
  4050. {
  4051. w->PrintEOL(_u("REGEX ANNOTATED AST /%s/ {"), PointerValue(program->source));
  4052. w->Indent();
  4053. root->Print(w, program->rep.insts.litbuf);
  4054. w->Unindent();
  4055. w->PrintEOL(_u("}"));
  4056. w->Flush();
  4057. }
  4058. #endif
  4059. CharCount skipped = 0;
  4060. // If the root Node has a hard fail BOI, we should not emit any synchronize Nodes
  4061. // since we can easily just search from the beginning.
  4062. if (root->hasInitialHardFailBOI == false)
  4063. {
  4064. // If the root Node doesn't have hard fail BOI but sticky flag is present don't synchronize Nodes
  4065. // since we can easily just search from the beginning. Instead set to special InstructionTag
  4066. if ((program->flags & StickyRegexFlag) != 0)
  4067. {
  4068. compiler.SetBOIInstructionsProgramForStickyFlagTag();
  4069. }
  4070. else
  4071. {
  4072. Node* bestSyncronizingNode = 0;
  4073. root->BestSyncronizingNode(compiler, bestSyncronizingNode);
  4074. Node* headSyncronizingNode = root->HeadSyncronizingNode(compiler);
  4075. if ((bestSyncronizingNode == 0 && headSyncronizingNode != 0) ||
  4076. (bestSyncronizingNode != 0 && headSyncronizingNode == bestSyncronizingNode))
  4077. {
  4078. // Scan and consume the head, continue with rest assuming head has been consumed
  4079. skipped = headSyncronizingNode->EmitScan(compiler, true);
  4080. }
  4081. else if (bestSyncronizingNode != 0)
  4082. {
  4083. // Scan for the synchronizing node, then backup ready for entire pattern
  4084. skipped = bestSyncronizingNode->EmitScan(compiler, false);
  4085. Assert(skipped == 0);
  4086. // We're synchronizing to a non-head node; if we have to back up, then try to synchronize to a character
  4087. // in the first set before running the remaining instructions
  4088. if (!bestSyncronizingNode->prevConsumes.CouldMatchEmpty()) // must back up at least one character
  4089. skipped = root->EmitScanFirstSet(compiler);
  4090. }
  4091. else
  4092. {
  4093. // Optionally scan for a character in the overall pattern's FIRST set, possibly consume it,
  4094. // then match all or remainder of pattern
  4095. skipped = root->EmitScanFirstSet(compiler);
  4096. }
  4097. }
  4098. }
  4099. root->Emit(compiler, skipped);
  4100. compiler.Emit<SuccInst>();
  4101. compiler.CaptureInsts();
  4102. }
  4103. }
  4104. else
  4105. {
  4106. program->tag = Program::InstructionsTag;
  4107. compiler.CaptureLiterals(root, litbuf);
  4108. CharCount skipped = 0;
  4109. root->Emit(compiler, skipped);
  4110. compiler.Emit<SuccInst>();
  4111. compiler.CaptureInsts();
  4112. }
  4113. }
  4114. #if ENABLE_REGEX_CONFIG_OPTIONS
  4115. if (w != 0)
  4116. {
  4117. w->PrintEOL(_u("REGEX PROGRAM /%s/ "), PointerValue(program->source));
  4118. program->Print(w);
  4119. w->Flush();
  4120. }
  4121. #endif
  4122. }
  4123. }