| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "ParserPch.h"
- namespace UnifiedRegex
- {
- // ----------------------------------------------------------------------
- // CountDomain
- // ----------------------------------------------------------------------
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void CountDomain::Print(DebugWriter* w) const
- {
- if (upper != CharCountFlag && lower == (CharCount)upper)
- w->Print(_u("[%u]"), lower);
- else
- {
- w->Print(_u("[%u-"), lower);
- if (upper == CharCountFlag)
- w->Print(_u("inf]"));
- else
- w->Print(_u("%u]"), (CharCount)upper);
- }
- }
- #endif
- // ----------------------------------------------------------------------
- // Matcher (inlined, called from instruction Exec methods)
- // ----------------------------------------------------------------------
- #define PUSH(contStack, T, ...) (new (contStack.Push<T>()) T(__VA_ARGS__))
- #define PUSHA(assertionStack, T, ...) (new (assertionStack.Push()) T(__VA_ARGS__))
- #define L2I(O, label) LabelToInstPointer<O##Inst>(Inst::O, label)
- #define FAIL_PARAMETERS input, inputOffset, instPointer, contStack, assertionStack, qcTicks
- #define HARDFAIL_PARAMETERS(mode) input, inputLength, matchStart, inputOffset, instPointer, contStack, assertionStack, qcTicks, mode
- // Regex QC heuristics:
- // - TicksPerQC
- // - Number of ticks from a previous QC needed to cause another QC. The value affects how often QC will be triggered, so
- // on slower machines or debug builds, the value needs to be smaller to maintain a reasonable frequency of QCs.
- // - TicksPerQcTimeCheck
- // - Number of ticks from a previous QC needed to trigger a time check. Elapsed time from the previous QC is checked to
- // see if a QC needs to be triggered. The value must be less than TicksPerQc and small enough to reasonably guarantee
- // a QC every TimePerQc milliseconds without affecting perf.
- // - TimePerQc
- // - The target time between QCs
- #if defined(_M_ARM)
- const uint Matcher::TicksPerQc = 1u << 19
- #else
- const uint Matcher::TicksPerQc = 1u << (AutoSystemInfo::ShouldQCMoreFrequently() ? 17 : 21)
- #endif
- #if DBG
- >> 2
- #endif
- ;
- const uint Matcher::TicksPerQcTimeCheck = Matcher::TicksPerQc >> 2;
- const uint Matcher::TimePerQc = AutoSystemInfo::ShouldQCMoreFrequently() ? 50 : 100; // milliseconds
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Matcher::PushStats(ContStack& contStack, const Char* const input) const
- {
- if (stats != 0)
- {
- stats->numPushes++;
- if (contStack.Position() > stats->stackHWM)
- stats->stackHWM = contStack.Position();
- }
- if (w != 0)
- {
- w->Print(_u("PUSH "));
- contStack.Top()->Print(w, input);
- }
- }
- void Matcher::PopStats(ContStack& contStack, const Char* const input) const
- {
- if (stats != 0)
- stats->numPops++;
- if (w != 0)
- {
- const Cont* top = contStack.Top();
- if (top == 0)
- w->PrintEOL(_u("<empty stack>"));
- else
- {
- w->Print(_u("POP "));
- top->Print(w, input);
- }
- }
- }
- void Matcher::UnPopStats(ContStack& contStack, const Char* const input) const
- {
- if (stats != 0)
- stats->numPops--;
- if (w != 0)
- {
- const Cont* top = contStack.Top();
- if (top == 0)
- w->PrintEOL(_u("<empty stack>"));
- else
- {
- w->Print(_u("UNPOP "));
- top->Print(w, input);
- }
- }
- }
- void Matcher::CompStats() const
- {
- if (stats != 0)
- stats->numCompares++;
- }
- void Matcher::InstStats() const
- {
- if (stats != 0)
- stats->numInsts++;
- }
- #endif
- __inline void Matcher::QueryContinue(uint &qcTicks)
- {
- // See definition of TimePerQc for description of regex QC heuristics
- Assert(!(TicksPerQc & TicksPerQc - 1)); // must be a power of 2
- Assert(!(TicksPerQcTimeCheck & TicksPerQcTimeCheck - 1)); // must be a power of 2
- Assert(TicksPerQcTimeCheck < TicksPerQc);
- if(PHASE_OFF1(Js::RegexQcPhase))
- return;
- if(++qcTicks & TicksPerQcTimeCheck - 1)
- return;
- DoQueryContinue(qcTicks);
- }
- __inline bool Matcher::HardFail
- ( const Char* const input
- , const CharCount inputLength
- , CharCount &matchStart
- , CharCount &inputOffset
- , const uint8 *&instPointer
- , ContStack &contStack
- , AssertionStack &assertionStack
- , uint &qcTicks
- , HardFailMode mode )
- {
- switch (mode)
- {
- case BacktrackAndLater:
- return Fail(FAIL_PARAMETERS);
- case BacktrackOnly:
- if (Fail(FAIL_PARAMETERS))
- {
- // No use trying any more start positions
- matchStart = inputLength;
- return true; // STOP EXECUTING
- }
- else
- return false;
- case LaterOnly:
- #if ENABLE_REGEX_CONFIG_OPTIONS
- if (w != 0)
- w->PrintEOL(_u("CLEAR"));
- #endif
- contStack.Clear();
- assertionStack.Clear();
- return true; // STOP EXECUTING
- case ImmediateFail:
- // No use trying any more start positions
- matchStart = inputLength;
- return true; // STOP EXECUTING
- default:
- Assume(false);
- }
- return true;
- }
- __inline bool Matcher::PopAssertion(CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, bool succeeded)
- {
- AssertionInfo* info = assertionStack.Top();
- Assert(info != 0);
- assertionStack.Pop();
- BeginAssertionInst* begin = L2I(BeginAssertion, info->beginLabel);
- // Cut the existing continuations (we never backtrack into an assertion)
- // NOTE: We don't include the effective pops in the stats
- #if ENABLE_REGEX_CONFIG_OPTIONS
- if (w != 0)
- w->PrintEOL(_u("POP TO %llu"), (unsigned long long)info->contStackPosition);
- #endif
- contStack.PopTo(info->contStackPosition);
- // succeeded isNegation action
- // --------- ---------- ----------------------------------------------------------------------------------
- // false false Fail into outer continuations (inner group bindings will have been undone)
- // true false Jump to next label (inner group bindings are now frozen)
- // false true Jump to next label (inner group bindings will have been undone and are now frozen)
- // true true Fail into outer continuations (inner group binding MUST BE CLEARED)
- if (succeeded && begin->isNegation)
- ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId);
- if (succeeded == begin->isNegation)
- {
- // Assertion failed
- return false;
- }
- else
- {
- // Continue with next label but from original input position
- inputOffset = info->startInputOffset;
- instPointer = LabelToInstPointer(begin->nextLabel);
- return true;
- }
- }
- __inline void Matcher::SaveInnerGroups(
- const int fromGroupId,
- const int toGroupId,
- const bool reset,
- const Char *const input,
- ContStack &contStack)
- {
- if(toGroupId >= 0)
- DoSaveInnerGroups(fromGroupId, toGroupId, reset, input, contStack);
- }
- void Matcher::DoSaveInnerGroups(
- const int fromGroupId,
- const int toGroupId,
- const bool reset,
- const Char *const input,
- ContStack &contStack)
- {
- Assert(fromGroupId >= 0);
- Assert(toGroupId >= 0);
- Assert(fromGroupId <= toGroupId);
- int undefinedRangeFromId = -1;
- int groupId = fromGroupId;
- do
- {
- GroupInfo *const groupInfo = GroupIdToGroupInfo(groupId);
- if(groupInfo->IsUndefined())
- {
- if(undefinedRangeFromId < 0)
- undefinedRangeFromId = groupId;
- continue;
- }
- if(undefinedRangeFromId >= 0)
- {
- Assert(groupId > 0);
- DoSaveInnerGroups_AllUndefined(undefinedRangeFromId, groupId - 1, input, contStack);
- undefinedRangeFromId = -1;
- }
- PUSH(contStack, RestoreGroupCont, groupId, *groupInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- PushStats(contStack, input);
- #endif
- if(reset)
- groupInfo->Reset();
- } while(++groupId <= toGroupId);
- if(undefinedRangeFromId >= 0)
- {
- Assert(toGroupId >= 0);
- DoSaveInnerGroups_AllUndefined(undefinedRangeFromId, toGroupId, input, contStack);
- }
- }
- __inline void Matcher::SaveInnerGroups_AllUndefined(
- const int fromGroupId,
- const int toGroupId,
- const Char *const input,
- ContStack &contStack)
- {
- if(toGroupId >= 0)
- DoSaveInnerGroups_AllUndefined(fromGroupId, toGroupId, input, contStack);
- }
- void Matcher::DoSaveInnerGroups_AllUndefined(
- const int fromGroupId,
- const int toGroupId,
- const Char *const input,
- ContStack &contStack)
- {
- Assert(fromGroupId >= 0);
- Assert(toGroupId >= 0);
- Assert(fromGroupId <= toGroupId);
- #if DBG
- for(int groupId = fromGroupId; groupId <= toGroupId; ++groupId)
- {
- Assert(GroupIdToGroupInfo(groupId)->IsUndefined());
- }
- #endif
- if(fromGroupId == toGroupId)
- PUSH(contStack, ResetGroupCont, fromGroupId);
- else
- PUSH(contStack, ResetGroupRangeCont, fromGroupId, toGroupId);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- PushStats(contStack, input);
- #endif
- }
- __inline void Matcher::ResetGroup(int groupId)
- {
- GroupInfo* info = GroupIdToGroupInfo(groupId);
- info->Reset();
- }
- __inline void Matcher::ResetInnerGroups(int minGroupId, int maxGroupId)
- {
- for (int i = minGroupId; i <= maxGroupId; i++)
- ResetGroup(i);
- }
- // ----------------------------------------------------------------------
- // Mixins
- // ----------------------------------------------------------------------
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void BackupMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("backup: "));
- backup.Print(w);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void CharMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("c: "));
- w->PrintQuotedChar(c);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Char2Mixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("c0: "));
- w->PrintQuotedChar(cs[0]);
- w->Print(_u(", c1: "));
- w->PrintQuotedChar(cs[1]);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Char3Mixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("c0: "));
- w->PrintQuotedChar(cs[0]);
- w->Print(_u(", c1: "));
- w->PrintQuotedChar(cs[1]);
- w->Print(_u(", c2: "));
- w->PrintQuotedChar(cs[2]);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Char4Mixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("c0: "));
- w->PrintQuotedChar(cs[0]);
- w->Print(_u(", c1: "));
- w->PrintQuotedChar(cs[1]);
- w->Print(_u(", c2: "));
- w->PrintQuotedChar(cs[2]);
- w->Print(_u(", c3: "));
- w->PrintQuotedChar(cs[3]);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void LiteralMixin::Print(DebugWriter* w, const char16* litbuf, bool isEquivClass) const
- {
- if (isEquivClass)
- {
- w->Print(_u("equivLiterals: "));
- for (int i = 0; i < CaseInsensitive::EquivClassSize; i++)
- {
- if (i > 0)
- w->Print(_u(", "));
- w->Print(_u("\""));
- for (CharCount j = 0; j < length; j++)
- w->PrintEscapedChar(litbuf[offset + j * CaseInsensitive::EquivClassSize + i]);
- w->Print(_u("\""));
- }
- }
- else
- {
- w->Print(_u("literal: "));
- w->PrintQuotedString(litbuf + offset, length);
- }
- }
- #endif
- // ----------------------------------------------------------------------
- // Char2LiteralScannerMixin
- // ----------------------------------------------------------------------
- bool Char2LiteralScannerMixin::Match(Matcher& matcher, const char16* const input, const CharCount inputLength, CharCount& inputOffset) const
- {
- if (inputLength == 0)
- {
- return false;
- }
- const uint matchC0 = Chars<char16>::CTU(cs[0]);
- const uint matchC1 = Chars<char16>::CTU(cs[1]);
- const char16 * currentInput = input + inputOffset;
- const char16 * endInput = input + inputLength - 1;
- while (currentInput < endInput)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (true)
- {
- const uint c1 = Chars<char16>::CTU(currentInput[1]);
- if (c1 != matchC1)
- {
- if (c1 == matchC0)
- {
- break;
- }
- currentInput += 2;
- if (currentInput >= endInput)
- {
- return false;
- }
- continue;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- // Check the first character
- const uint c0 = Chars<char16>::CTU(*currentInput);
- if (c0 == matchC0)
- {
- inputOffset = (CharCount)(currentInput - input);
- return true;
- }
- if (matchC0 == matchC1)
- {
- break;
- }
- currentInput +=2;
- if (currentInput >= endInput)
- {
- return false;
- }
- }
- // If the second character in the buffer matches the first in the pattern, continue
- // to see if the next character has the second in the pattern
- currentInput++;
- while (currentInput < endInput)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- const uint c1 = Chars<char16>::CTU(currentInput[1]);
- if (c1 == matchC1)
- {
- inputOffset = (CharCount)(currentInput - input);
- return true;
- }
- if (c1 != matchC0)
- {
- currentInput += 2;
- break;
- }
- currentInput++;
- }
- }
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Char2LiteralScannerMixin::Print(DebugWriter* w, const char16 * litbuf) const
- {
- Char2Mixin::Print(w, litbuf);
- w->Print(_u(" (with two character literal scanner)"));
- }
- #endif
- // ----------------------------------------------------------------------
- // ScannerMixinT
- // ----------------------------------------------------------------------
- template <typename ScannerT>
- void ScannerMixinT<ScannerT>::FreeBody(ArenaAllocator* rtAllocator)
- {
- scanner.FreeBody(rtAllocator, length);
- }
- template <typename ScannerT>
- __inline bool
- ScannerMixinT<ScannerT>::Match(Matcher& matcher, const char16 * const input, const CharCount inputLength, CharCount& inputOffset) const
- {
- Assert(length <= matcher.program->rep.insts.litbufLen - offset);
- return scanner.Match<1>
- ( input
- , inputLength
- , inputOffset
- , matcher.program->rep.insts.litbuf + offset
- , length
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , matcher.stats
- #endif
- );
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template <typename ScannerT>
- void ScannerMixinT<ScannerT>::Print(DebugWriter* w, const char16* litbuf, bool isEquivClass) const
- {
- LiteralMixin::Print(w, litbuf, isEquivClass);
- w->Print(_u(" (with %s scanner)"), ScannerT::GetName());
- }
- #endif
- // explicit instantiation
- template ScannerMixinT<TextbookBoyerMoore<char16>>;
- template ScannerMixinT<TextbookBoyerMooreWithLinearMap<char16>>;
- // ----------------------------------------------------------------------
- // EquivScannerMixinT
- // ----------------------------------------------------------------------
- template <uint lastPatCharEquivClassSize>
- __inline bool EquivScannerMixinT<lastPatCharEquivClassSize>::Match(Matcher& matcher, const char16* const input, const CharCount inputLength, CharCount& inputOffset) const
- {
- Assert(length * CaseInsensitive::EquivClassSize <= matcher.program->rep.insts.litbufLen - offset);
- CompileAssert(lastPatCharEquivClassSize >= 1 && lastPatCharEquivClassSize <= CaseInsensitive::EquivClassSize);
- return scanner.Match<CaseInsensitive::EquivClassSize, lastPatCharEquivClassSize>
- ( input
- , inputLength
- , inputOffset
- , matcher.program->rep.insts.litbuf + offset
- , length
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , matcher.stats
- #endif
- );
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template <uint lastPatCharEquivClassSize>
- void EquivScannerMixinT<lastPatCharEquivClassSize>::Print(DebugWriter* w, const char16* litbuf) const
- {
- __super::Print(w, litbuf, true);
- w->Print(_u(" (last char equiv size:%d)"), lastPatCharEquivClassSize);
- }
- // explicit instantiation
- template struct EquivScannerMixinT<1>;
- #endif
- // ----------------------------------------------------------------------
- // ScannerInfo
- // ----------------------------------------------------------------------
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void ScannerInfo::Print(DebugWriter* w, const char16* litbuf) const
- {
- ScannerMixin::Print(w, litbuf, isEquivClass);
- }
- #endif
- ScannerInfo* ScannersMixin::Add(Recycler *recycler, Program *program, CharCount offset, CharCount length, bool isEquivClass)
- {
- Assert(numLiterals < MaxNumSyncLiterals);
- return program->AddScannerForSyncToLiterals(recycler, numLiterals++, offset, length, isEquivClass);
- }
- void ScannersMixin::FreeBody(ArenaAllocator* rtAllocator)
- {
- for (int i = 0; i < numLiterals; i++)
- {
- infos[i]->FreeBody(rtAllocator);
- #if DBG
- infos[i] = 0;
- #endif
- }
- #if DBG
- numLiterals = 0;
- #endif
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void ScannersMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("literals: {"));
- for (int i = 0; i < numLiterals; i++)
- {
- if (i > 0)
- w->Print(_u(", "));
- infos[i]->Print(w, litbuf);
- }
- w->Print(_u("}"));
- }
- #endif
- template<bool IsNegation>
- void SetMixin<IsNegation>::FreeBody(ArenaAllocator* rtAllocator)
- {
- set.FreeBody(rtAllocator);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<bool IsNegation>
- void SetMixin<IsNegation>::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("set: "));
- if (IsNegation)
- w->Print(_u("not "));
- set.Print(w);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void HardFailMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("hardFail: %s"), canHardFail ? _u("true") : _u("false"));
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void GroupMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("groupId: %d"), groupId);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void ChompBoundedMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("repeats: "));
- repeats.Print(w);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void JumpMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("targetLabel: L%04x"), targetLabel);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void BodyGroupsMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("minBodyGroupId: %d, maxBodyGroupId: %d"), minBodyGroupId, maxBodyGroupId);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void BeginLoopMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("loopId: %d, repeats: "), loopId);
- repeats.Print(w);
- w->Print(_u(", exitLabel: L%04x, hasOuterLoops: %s, hasInnerNondet: %s"), exitLabel, hasOuterLoops ? _u("true") : _u("false"), hasInnerNondet ? _u("true") : _u("false"));
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void RepeatLoopMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("beginLabel: L%04x"), beginLabel);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void TryMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("failLabel: L%04x"), failLabel);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void FixedLengthMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("length: %u"), length);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void NoNeedToSaveMixin::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->Print(_u("noNeedToSave: %s"), noNeedToSave ? _u("true") : _u("false"));
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void SwitchCase::Print(DebugWriter* w) const
- {
- w->Print(_u("case "));
- w->PrintQuotedChar(c);
- w->PrintEOL(_u(": Jump(L%04x)"), targetLabel);
- }
- #endif
- template <int n>
- void SwitchMixin<n>::AddCase(char16 c, Label targetLabel)
- {
- Assert(numCases < MaxCases);
- int i;
- __analysis_assume(numCases < MaxCases);
- for (i = 0; i < numCases; i++)
- {
- Assert(cases[i].c != c);
- if (cases[i].c > c)
- break;
- }
- __analysis_assume(numCases < MaxCases);
- for (int j = numCases; j > i; j--)
- cases[j] = cases[j - 1];
- cases[i].c = c;
- cases[i].targetLabel = targetLabel;
- numCases++;
- }
- void UnifiedRegexSwitchMixinForceAllInstantiations()
- {
- {
- SwitchMixin<10> x;
- x.AddCase(0, 0);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- x.Print(0, 0);
- #endif
- }
- {
- SwitchMixin<20> x;
- x.AddCase(0, 0);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- x.Print(0, 0);
- #endif
- }
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template <int n>
- void SwitchMixin<n>::Print(DebugWriter* w, const char16* litbuf) const
- {
- w->EOL();
- w->Indent();
- for (int i = 0; i < numCases; i++)
- cases[i].Print(w);
- w->Unindent();
- }
- #endif
- // ----------------------------------------------------------------------
- // FailInst
- // ----------------------------------------------------------------------
- __inline bool FailInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- return matcher.Fail(FAIL_PARAMETERS);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int FailInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: Fail()"), label);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SuccInst
- // ----------------------------------------------------------------------
- __inline bool SuccInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- GroupInfo* info = matcher.GroupIdToGroupInfo(0);
- info->offset = matchStart;
- info->length = inputOffset - matchStart;
- return true; // STOP MATCHING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SuccInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: Succ()"), label);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // JumpInst
- // ----------------------------------------------------------------------
- __inline bool JumpInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- instPointer = matcher.LabelToInstPointer(targetLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int JumpInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: Jump("), label);
- JumpMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // JumpIfNotCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool JumpIfNotCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && input[inputOffset] == c)
- instPointer += sizeof(*this);
- else
- instPointer = matcher.LabelToInstPointer(targetLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int JumpIfNotCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: JumpIfNotChar("), label);
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- JumpMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchCharOrJumpInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool MatchCharOrJumpInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && input[inputOffset] == c)
- {
- inputOffset++;
- instPointer += sizeof(*this);
- }
- else
- instPointer = matcher.LabelToInstPointer(targetLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchCharOrJumpInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchCharOrJump("), label);
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- JumpMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // JumpIfNotSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool JumpIfNotSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && set.Get(input[inputOffset]))
- instPointer += sizeof(*this);
- else
- instPointer = matcher.LabelToInstPointer(targetLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int JumpIfNotSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: JumpIfNotSet("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- JumpMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchSetOrJumpInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool MatchSetOrJumpInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && set.Get(input[inputOffset]))
- {
- inputOffset++;
- instPointer += sizeof(*this);
- }
- else
- instPointer = matcher.LabelToInstPointer(targetLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchSetOrJumpInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchSetOrJump("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- JumpMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // Switch10Inst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool Switch10Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (inputOffset >= inputLength)
- return matcher.Fail(FAIL_PARAMETERS);
- #if 0
- int l = 0;
- int h = numCases - 1;
- while (l <= h)
- {
- int m = (l + h) / 2;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[m].c == input[inputOffset])
- {
- instPointer = matcher.LabelToInstPointer(cases[m].targetLabel);
- return false;
- }
- else if (cases[m].c < input[inputOffset])
- l = m + 1;
- else
- h = m - 1;
- }
- #else
- const int localNumCases = numCases;
- for (int i = 0; i < localNumCases; i++)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[i].c == input[inputOffset])
- {
- instPointer = matcher.LabelToInstPointer(cases[i].targetLabel);
- return false;
- }
- else if (cases[i].c > input[inputOffset])
- break;
- }
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int Switch10Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: Switch10("), label);
- SwitchMixin<MaxCases>::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // Switch20Inst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool Switch20Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (inputOffset >= inputLength)
- return matcher.Fail(FAIL_PARAMETERS);
- #if 0
- int l = 0;
- int h = numCases - 1;
- while (l <= h)
- {
- int m = (l + h) / 2;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[m].c == input[inputOffset])
- {
- instPointer = matcher.LabelToInstPointer(cases[m].targetLabel);
- return false;
- }
- else if (cases[m].c < input[inputOffset])
- l = m + 1;
- else
- h = m - 1;
- }
- #else
- const int localNumCases = numCases;
- for (int i = 0; i < localNumCases; i++)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[i].c == input[inputOffset])
- {
- instPointer = matcher.LabelToInstPointer(cases[i].targetLabel);
- return false;
- }
- else if (cases[i].c > input[inputOffset])
- break;
- }
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int Switch20Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: Switch20("), label);
- SwitchMixin<MaxCases>::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SwitchAndConsume10Inst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SwitchAndConsume10Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (inputOffset >= inputLength)
- return matcher.Fail(FAIL_PARAMETERS);
- #if 0
- int l = 0;
- int h = numCases - 1;
- while (l <= h)
- {
- int m = (l + h) / 2;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[m].c == input[inputOffset])
- {
- inputOffset++;
- instPointer = matcher.LabelToInstPointer(cases[m].targetLabel);
- return false;
- }
- else if (cases[m].c < input[inputOffset])
- l = m + 1;
- else
- h = m - 1;
- }
- #else
- const int localNumCases = numCases;
- for (int i = 0; i < localNumCases; i++)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[i].c == input[inputOffset])
- {
- inputOffset++;
- instPointer = matcher.LabelToInstPointer(cases[i].targetLabel);
- return false;
- }
- else if (cases[i].c > input[inputOffset])
- break;
- }
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SwitchAndConsume10Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SwitchAndConsume10("), label);
- SwitchMixin<MaxCases>::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SwitchAndConsume20Inst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SwitchAndConsume20Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (inputOffset >= inputLength)
- return matcher.Fail(FAIL_PARAMETERS);
- #if 0
- int l = 0;
- int h = numCases - 1;
- while (l <= h)
- {
- int m = (l + h) / 2;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[m].c == input[inputOffset])
- {
- inputOffset++;
- instPointer = matcher.LabelToInstPointer(cases[m].targetLabel);
- return false;
- }
- else if (cases[m].c < input[inputOffset])
- l = m + 1;
- else
- h = m - 1;
- }
- #else
- const int localNumCases = numCases;
- for (int i = 0; i < localNumCases; i++)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (cases[i].c == input[inputOffset])
- {
- inputOffset++;
- instPointer = matcher.LabelToInstPointer(cases[i].targetLabel);
- return false;
- }
- else if (cases[i].c > input[inputOffset])
- break;
- }
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SwitchAndConsume20Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SwitchAndConsume20("), label);
- SwitchMixin<MaxCases>::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BOITestInst
- // ----------------------------------------------------------------------
- __inline bool BOITestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (inputOffset > 0)
- {
- if (canHardFail)
- // Clearly trying to start from later in the input won't help, and we know backtracking can't take us earlier in the input
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- else
- return matcher.Fail(FAIL_PARAMETERS);
- }
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BOITestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BOITest("), label);
- HardFailMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // EOITestInst
- // ----------------------------------------------------------------------
- __inline bool EOITestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (inputOffset < inputLength)
- {
- if (canHardFail)
- // We know backtracking can never take us later in the input, but starting from later in the input could help
- return matcher.HardFail(HARDFAIL_PARAMETERS(LaterOnly));
- else
- return matcher.Fail(FAIL_PARAMETERS);
- }
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int EOITestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: EOITest("), label);
- HardFailMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BOLTestInst
- // ----------------------------------------------------------------------
- __inline bool BOLTestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset > 0 && !matcher.standardChars->IsNewline(input[inputOffset - 1]))
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BOLTestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: BOLTest()"), label);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // EOLTestInst
- // ----------------------------------------------------------------------
- __inline bool EOLTestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && !matcher.standardChars->IsNewline(input[inputOffset]))
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int EOLTestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: EOLTest()"), label);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // WordBoundaryTestInst
- // ----------------------------------------------------------------------
- __inline bool WordBoundaryTestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- const bool prev = inputOffset > 0 && matcher.standardChars->IsWord(input[inputOffset - 1]);
- const bool curr = inputOffset < inputLength && matcher.standardChars->IsWord(input[inputOffset]);
- if (isNegation == (prev != curr))
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int WordBoundaryTestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: WordBoundaryTest(isNegation: %s)"), label, isNegation ? _u("true") : _u("false"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchCharInst
- // ----------------------------------------------------------------------
- __inline bool MatchCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset >= inputLength || input[inputOffset] != c)
- return matcher.Fail(FAIL_PARAMETERS);
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchChar("), label);
- CharMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchChar2Inst
- // ----------------------------------------------------------------------
- __inline bool MatchChar2Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset >= inputLength || (input[inputOffset] != cs[0] && input[inputOffset] != cs[1]))
- return matcher.Fail(FAIL_PARAMETERS);
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchChar2Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchChar2("), label);
- Char2Mixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchChar3Inst
- // ----------------------------------------------------------------------
- __inline bool MatchChar3Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset >= inputLength || (input[inputOffset] != cs[0] && input[inputOffset] != cs[1] && input[inputOffset] != cs[2]))
- return matcher.Fail(FAIL_PARAMETERS);
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchChar3Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchChar3("), label);
- Char3Mixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchChar4Inst
- // ----------------------------------------------------------------------
- __inline bool MatchChar4Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset >= inputLength || (input[inputOffset] != cs[0] && input[inputOffset] != cs[1] && input[inputOffset] != cs[2] && input[inputOffset] != cs[3]))
- return matcher.Fail(FAIL_PARAMETERS);
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchChar4Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchChar4("), label);
- Char4Mixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchSetInst
- // ----------------------------------------------------------------------
- template<bool IsNegation>
- __inline bool MatchSetInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset >= inputLength || set.Get(input[inputOffset]) == IsNegation)
- return matcher.Fail(FAIL_PARAMETERS);
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<bool IsNegation>
- int MatchSetInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchSet("), label);
- SetMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchLiteralInst
- // ----------------------------------------------------------------------
- __inline bool MatchLiteralInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- Assert(length <= matcher.program->rep.insts.litbufLen - offset);
- if (length > inputLength - inputOffset)
- return matcher.Fail(FAIL_PARAMETERS);
- const Char *const literalBuffer = matcher.program->rep.insts.litbuf;
- const Char * literalCurr = literalBuffer + offset;
- const Char * inputCurr = input + inputOffset;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (*literalCurr != *inputCurr)
- {
- inputOffset++;
- return matcher.Fail(FAIL_PARAMETERS);
- }
- const Char *const literalEnd = literalCurr + length;
- literalCurr++;
- inputCurr++;
- while (literalCurr < literalEnd)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (*literalCurr != *inputCurr++)
- {
- inputOffset = (CharCount)(inputCurr - input);
- return matcher.Fail(FAIL_PARAMETERS);
- }
- literalCurr++;
- }
- inputOffset = (CharCount)(inputCurr - input);
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchLiteralInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchLiteral("), label);
- LiteralMixin::Print(w, litbuf, false);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchLiteralEquivInst
- // ----------------------------------------------------------------------
- __inline bool MatchLiteralEquivInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (length > inputLength - inputOffset)
- return matcher.Fail(FAIL_PARAMETERS);
- const Char *const literalBuffer = matcher.program->rep.insts.litbuf;
- CharCount literalOffset = offset;
- const CharCount literalEndOffset = offset + length * CaseInsensitive::EquivClassSize;
- Assert(literalEndOffset <= matcher.program->rep.insts.litbufLen);
- CompileAssert(CaseInsensitive::EquivClassSize == 4);
- do
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (input[inputOffset] != literalBuffer[literalOffset]
- && input[inputOffset] != literalBuffer[literalOffset + 1]
- && input[inputOffset] != literalBuffer[literalOffset + 2]
- && input[inputOffset] != literalBuffer[literalOffset + 3])
- {
- return matcher.Fail(FAIL_PARAMETERS);
- }
- inputOffset++;
- literalOffset += CaseInsensitive::EquivClassSize;
- }
- while (literalOffset < literalEndOffset);
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchLiteralEquivInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchLiteralEquiv("), label);
- LiteralMixin::Print(w, litbuf, true);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchTrieInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool MatchTrieInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (!trie.Match
- ( input
- , inputLength
- , inputOffset
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , matcher.stats
- #endif
- ))
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer += sizeof(*this);
- return false;
- }
- void MatchTrieInst::FreeBody(ArenaAllocator* rtAllocator)
- {
- trie.FreeBody(rtAllocator);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchTrieInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: MatchTrie("), label);
- trie.Print(w);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // OptMatchCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool OptMatchCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && input[inputOffset] == c)
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int OptMatchCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: OptMatchChar("), label);
- CharMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // OptMatchSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool OptMatchSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && set.Get(input[inputOffset]))
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int OptMatchSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: OptMatchSet("), label);
- SetMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToCharAndContinueInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SyncToCharAndContinueInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const Char matchC = c;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputLength && input[inputOffset] != matchC)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- matchStart = inputOffset;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SyncToCharAndContinueInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToCharAndContinue("), label);
- CharMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToChar2SetAndContinueInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SyncToChar2SetAndContinueInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const Char matchC0 = cs[0];
- const Char matchC1 = cs[1];
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputLength && input[inputOffset] != matchC0 && input[inputOffset] != matchC1)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- matchStart = inputOffset;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SyncToChar2SetAndContinueInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToChar2SetAndContinue("), label);
- Char2Mixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToSetAndContinueInst (optimized instruction)
- // ----------------------------------------------------------------------
- template<bool IsNegation>
- __inline bool SyncToSetAndContinueInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const RuntimeCharSet<Char>& matchSet = set;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputLength && matchSet.Get(input[inputOffset]) == IsNegation)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- matchStart = inputOffset;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<bool IsNegation>
- int SyncToSetAndContinueInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToSetAndContinue("), label);
- SetMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToLiteralAndContinueInst (optimized instruction)
- // ----------------------------------------------------------------------
- template <typename ScannerT>
- __inline bool SyncToLiteralAndContinueInstT<ScannerT>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (!Match(matcher, input, inputLength, inputOffset))
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- matchStart = inputOffset;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template <typename ScannerT>
- int SyncToLiteralAndContinueInstT<ScannerT>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToLiteralAndContinue("), label);
- ScannerT::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- // explicit instantiation
- template struct SyncToLiteralAndContinueInstT<Char2LiteralScannerMixin>;
- template struct SyncToLiteralAndContinueInstT<ScannerMixin>;
- template struct SyncToLiteralAndContinueInstT<ScannerMixin_WithLinearCharMap>;
- template struct SyncToLiteralAndContinueInstT<EquivScannerMixin>;
- template struct SyncToLiteralAndContinueInstT<EquivTrivialLastPatCharScannerMixin>;
- #endif
- // ----------------------------------------------------------------------
- // SyncToCharAndConsumeInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SyncToCharAndConsumeInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const Char matchC = c;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputLength && input[inputOffset] != matchC)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset >= inputLength)
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- matchStart = inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SyncToCharAndConsumeInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToCharAndConsume("), label);
- CharMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToChar2SetAndConsumeInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SyncToChar2SetAndConsumeInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const Char matchC0 = cs[0];
- const Char matchC1 = cs[1];
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputLength && (input[inputOffset] != matchC0 && input[inputOffset] != matchC1))
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset >= inputLength)
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- matchStart = inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SyncToChar2SetAndConsumeInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToChar2SetAndConsume("), label);
- Char2Mixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToSetAndConsumeInst (optimized instruction)
- // ----------------------------------------------------------------------
- template<bool IsNegation>
- __inline bool SyncToSetAndConsumeInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const RuntimeCharSet<Char>& matchSet = set;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputLength && matchSet.Get(input[inputOffset]) == IsNegation)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset >= inputLength)
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- matchStart = inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<bool IsNegation>
- int SyncToSetAndConsumeInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToSetAndConsume("), label);
- SetMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToLiteralAndConsumeInst (optimized instruction)
- // ----------------------------------------------------------------------
- template <typename ScannerT>
- __inline bool SyncToLiteralAndConsumeInstT<ScannerT>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (!Match(matcher, input, inputLength, inputOffset))
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- matchStart = inputOffset;
- inputOffset += ScannerT::GetLiteralLength();
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template <typename ScannerT>
- int SyncToLiteralAndConsumeInstT<ScannerT>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToLiteralAndConsume("), label);
- ScannerT::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- // explicit instantiation
- template struct SyncToLiteralAndConsumeInstT<Char2LiteralScannerMixin>;
- template struct SyncToLiteralAndConsumeInstT<ScannerMixin>;
- template struct SyncToLiteralAndConsumeInstT<ScannerMixin_WithLinearCharMap>;
- template struct SyncToLiteralAndConsumeInstT<EquivScannerMixin>;
- template struct SyncToLiteralAndConsumeInstT<EquivTrivialLastPatCharScannerMixin>;
- #endif
- // ----------------------------------------------------------------------
- // SyncToCharAndBackupInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SyncToCharAndBackupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (backup.lower > inputLength - matchStart)
- // Even match at very end doesn't allow for minimum backup
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- if(inputOffset < nextSyncInputOffset)
- {
- // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync
- // again since we'll sync to the same point in the input and back up to the same place we are at now
- instPointer += sizeof(*this);
- return false;
- }
- if (backup.lower > inputOffset - matchStart)
- // No use looking for match until minimum backup is possible
- inputOffset = matchStart + backup.lower;
- const Char matchC = c;
- while (inputOffset < inputLength && input[inputOffset] != matchC)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset >= inputLength)
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- nextSyncInputOffset = inputOffset + 1;
- if (backup.upper != CharCountFlag)
- {
- // Backup at most by backup.upper for new start
- CharCount maxBackup = inputOffset - matchStart;
- matchStart = inputOffset - min(maxBackup, (CharCount)backup.upper);
- }
- // else: leave start where it is
- // Move input to new match start
- inputOffset = matchStart;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SyncToCharAndBackupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToCharAndBackup("), label);
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BackupMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToSetAndBackupInst (optimized instruction)
- // ----------------------------------------------------------------------
- template<bool IsNegation>
- __inline bool SyncToSetAndBackupInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (backup.lower > inputLength - matchStart)
- // Even match at very end doesn't allow for minimum backup
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- if(inputOffset < nextSyncInputOffset)
- {
- // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync
- // again since we'll sync to the same point in the input and back up to the same place we are at now
- instPointer += sizeof(*this);
- return false;
- }
- if (backup.lower > inputOffset - matchStart)
- // No use looking for match until minimum backup is possible
- inputOffset = matchStart + backup.lower;
- const RuntimeCharSet<Char>& matchSet = set;
- while (inputOffset < inputLength && matchSet.Get(input[inputOffset]) == IsNegation)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset >= inputLength)
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- nextSyncInputOffset = inputOffset + 1;
- if (backup.upper != CharCountFlag)
- {
- // Backup at most by backup.upper for new start
- CharCount maxBackup = inputOffset - matchStart;
- matchStart = inputOffset - min(maxBackup, (CharCount)backup.upper);
- }
- // else: leave start where it is
- // Move input to new match start
- inputOffset = matchStart;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<bool IsNegation>
- int SyncToSetAndBackupInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToSetAndBackup("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BackupMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // SyncToLiteralAndBackupInst (optimized instruction)
- // ----------------------------------------------------------------------
- template <typename ScannerT>
- __inline bool SyncToLiteralAndBackupInstT<ScannerT>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (backup.lower > inputLength - matchStart)
- // Even match at very end doesn't allow for minimum backup
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- if(inputOffset < nextSyncInputOffset)
- {
- // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync
- // again since we'll sync to the same point in the input and back up to the same place we are at now
- instPointer += sizeof(*this);
- return false;
- }
- if (backup.lower > inputOffset - matchStart)
- // No use looking for match until minimum backup is possible
- inputOffset = matchStart + backup.lower;
- if (!Match(matcher, input, inputLength, inputOffset))
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- nextSyncInputOffset = inputOffset + 1;
- if (backup.upper != CharCountFlag)
- {
- // Set new start at most backup.upper from start of literal
- CharCount maxBackup = inputOffset - matchStart;
- matchStart = inputOffset - min(maxBackup, (CharCount)backup.upper);
- }
- // else: leave start where it is
- // Move input to new match start
- inputOffset = matchStart;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template <typename ScannerT>
- int SyncToLiteralAndBackupInstT<ScannerT>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToLiteralAndBackup("), label);
- ScannerT::Print(w, litbuf);
- w->Print(_u(", "));
- BackupMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- // explicit instantiation
- template struct SyncToLiteralAndBackupInstT<Char2LiteralScannerMixin>;
- template struct SyncToLiteralAndBackupInstT<ScannerMixin>;
- template struct SyncToLiteralAndBackupInstT<ScannerMixin_WithLinearCharMap>;
- template struct SyncToLiteralAndBackupInstT<EquivScannerMixin>;
- template struct SyncToLiteralAndBackupInstT<EquivTrivialLastPatCharScannerMixin>;
- #endif
- // ----------------------------------------------------------------------
- // SyncToLiteralsAndBackupInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool SyncToLiteralsAndBackupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (backup.lower > inputLength - matchStart)
- // Even match at very end doesn't allow for minimum backup
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- if(inputOffset < nextSyncInputOffset)
- {
- // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync
- // again since we'll sync to the same point in the input and back up to the same place we are at now
- instPointer += sizeof(*this);
- return false;
- }
- if (backup.lower > inputOffset - matchStart)
- // No use looking for match until minimum backup is possible
- inputOffset = matchStart + backup.lower;
- int besti = -1;
- CharCount bestMatchOffset = 0;
- if (matcher.literalNextSyncInputOffsets == nullptr)
- {
- Assert(numLiterals <= MaxNumSyncLiterals);
- matcher.literalNextSyncInputOffsets =
- RecyclerNewArrayLeaf(matcher.recycler, CharCount, ScannersMixin::MaxNumSyncLiterals);
- }
- CharCount* literalNextSyncInputOffsets = matcher.literalNextSyncInputOffsets;
- if (firstIteration)
- {
- for (int i = 0; i < numLiterals; i++)
- {
- literalNextSyncInputOffsets[i] = inputOffset;
- }
- }
- for (int i = 0; i < numLiterals; i++)
- {
- CharCount thisMatchOffset = literalNextSyncInputOffsets[i];
- if (inputOffset > thisMatchOffset)
- {
- thisMatchOffset = inputOffset;
- }
- if (infos[i]->isEquivClass ?
- (infos[i]->scanner.Match<CaseInsensitive::EquivClassSize>
- ( input
- , inputLength
- , thisMatchOffset
- , matcher.program->rep.insts.litbuf + infos[i]->offset
- , infos[i]->length
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , matcher.stats
- #endif
- )) :
- (infos[i]->scanner.Match<1>
- ( input
- , inputLength
- , thisMatchOffset
- , matcher.program->rep.insts.litbuf + infos[i]->offset
- , infos[i]->length
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , matcher.stats
- #endif
- )))
- {
- if (besti < 0 || thisMatchOffset < bestMatchOffset)
- {
- besti = i;
- bestMatchOffset = thisMatchOffset;
- }
- literalNextSyncInputOffsets[i] = thisMatchOffset;
- }
- else
- {
- literalNextSyncInputOffsets[i] = inputLength;
- }
- }
- if (besti < 0)
- // No literals matched
- return matcher.HardFail(HARDFAIL_PARAMETERS(ImmediateFail));
- nextSyncInputOffset = bestMatchOffset + 1;
- if (backup.upper != CharCountFlag)
- {
- // Set new start at most backup.upper from start of literal
- CharCount maxBackup = bestMatchOffset - matchStart;
- matchStart = bestMatchOffset - min(maxBackup, (CharCount)backup.upper);
- }
- // else: leave start where it is
- // Move input to new match start
- inputOffset = matchStart;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int SyncToLiteralsAndBackupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: SyncToLiteralsAndBackup("), label);
- ScannersMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BackupMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // MatchGroupInst
- // ----------------------------------------------------------------------
- __inline bool MatchGroupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- GroupInfo* const info = matcher.GroupIdToGroupInfo(groupId);
- if (!info->IsUndefined() && info->length > 0)
- {
- if (info->length > inputLength - inputOffset)
- return matcher.Fail(FAIL_PARAMETERS);
- CharCount groupOffset = info->offset;
- const CharCount groupEndOffset = groupOffset + info->length;
- bool isCaseInsensitiveMatch = (matcher.program->flags & IgnoreCaseRegexFlag) != 0;
- bool isCodePointList = (matcher.program->flags & UnicodeRegexFlag) != 0;
- // This is the only place in the runtime machinery we need to convert characters to their equivalence class
- if (isCaseInsensitiveMatch && isCodePointList)
- {
- auto getNextCodePoint = [=](CharCount &offset, CharCount endOffset, codepoint_t &codePoint) {
- if (endOffset <= offset)
- {
- return false;
- }
- Char lowerPart = input[offset];
- if (!Js::NumberUtilities::IsSurrogateLowerPart(lowerPart) || offset + 1 == endOffset)
- {
- codePoint = lowerPart;
- offset += 1;
- return true;
- }
- Char upperPart = input[offset + 1];
- if (!Js::NumberUtilities::IsSurrogateUpperPart(upperPart))
- {
- codePoint = lowerPart;
- offset += 1;
- }
- else
- {
- codePoint = Js::NumberUtilities::SurrogatePairAsCodePoint(lowerPart, upperPart);
- offset += 2;
- }
- return true;
- };
- codepoint_t equivs[CaseInsensitive::EquivClassSize];
- while (true)
- {
- codepoint_t groupCodePoint;
- bool hasGroupCodePoint = getNextCodePoint(groupOffset, groupEndOffset, groupCodePoint);
- if (!hasGroupCodePoint)
- {
- break;
- }
- // We don't need to verify that there is a valid input code point since at the beginning
- // of the function, we make sure that the length of the input is at least as long as the
- // length of the group.
- codepoint_t inputCodePoint;
- getNextCodePoint(inputOffset, inputLength, inputCodePoint);
- bool doesMatch = false;
- if (!Js::NumberUtilities::IsInSupplementaryPlane(groupCodePoint))
- {
- auto toCanonical = [&](codepoint_t c) {
- return matcher.standardChars->ToCanonical(
- CaseInsensitive::MappingSource::CaseFolding,
- static_cast<char16>(c));
- };
- doesMatch = (toCanonical(groupCodePoint) == toCanonical(inputCodePoint));
- }
- else
- {
- uint tblidx = 0;
- uint acth = 0;
- CaseInsensitive::RangeToEquivClass(tblidx, groupCodePoint, groupCodePoint, acth, equivs);
- CompileAssert(CaseInsensitive::EquivClassSize == 4);
- doesMatch =
- inputCodePoint == equivs[0]
- || inputCodePoint == equivs[1]
- || inputCodePoint == equivs[2]
- || inputCodePoint == equivs[3];
- }
- if (!doesMatch)
- {
- return matcher.Fail(FAIL_PARAMETERS);
- }
- }
- }
- else if (isCaseInsensitiveMatch)
- {
- do
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- auto toCanonical = [&](CharCount &offset) {
- return matcher.standardChars->ToCanonical(CaseInsensitive::MappingSource::UnicodeData, input[offset++]);
- };
- if (toCanonical(groupOffset) != toCanonical(inputOffset))
- {
- return matcher.Fail(FAIL_PARAMETERS);
- }
- }
- while (groupOffset < groupEndOffset);
- }
- else
- {
- do
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (input[groupOffset++] != input[inputOffset++])
- return matcher.Fail(FAIL_PARAMETERS);
- }
- while (groupOffset < groupEndOffset);
- }
- }
- // else: trivially match empty string
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int MatchGroupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: MatchGroup("), label);
- GroupMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginDefineGroupInst
- // ----------------------------------------------------------------------
- __inline bool BeginDefineGroupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId);
- Assert(groupInfo->IsUndefined());
- groupInfo->offset = inputOffset;
- Assert(groupInfo->IsUndefined());
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginDefineGroupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BeginDefineGroup("), label);
- GroupMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // EndDefineGroupInst
- // ----------------------------------------------------------------------
- __inline bool EndDefineGroupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (!noNeedToSave)
- {
- // UNDO ACTION: Restore group on backtrack
- PUSH(contStack, ResetGroupCont, groupId);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId);
- Assert(groupInfo->IsUndefined());
- Assert(inputOffset >= groupInfo->offset);
- groupInfo->length = inputOffset - groupInfo->offset;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int EndDefineGroupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: EndDefineGroup("), label);
- GroupMixin::Print(w, litbuf);
- w->Print(_u(", "));
- NoNeedToSaveMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // DefineGroupFixedInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool DefineGroupFixedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (!noNeedToSave)
- {
- // UNDO ACTION: Restore group on backtrack
- PUSH(contStack, ResetGroupCont, groupId);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId);
- Assert(groupInfo->IsUndefined());
- groupInfo->offset = inputOffset - length;
- groupInfo->length = length;
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int DefineGroupFixedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: DefineGroupFixed("), label);
- GroupMixin::Print(w, litbuf);
- w->Print(_u(", "));
- FixedLengthMixin::Print(w, litbuf);
- w->Print(_u(", "));
- NoNeedToSaveMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginLoopInst
- // ----------------------------------------------------------------------
- __inline bool BeginLoopInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId);
- // If loop has outer loops, the continuation stack may have choicepoints from an earlier "run" of this loop
- // which, when backtracked to, may expect the loopInfo state to be as it was at the time the choicepoint was
- // pushed.
- // - If the loop is greedy with deterministic body, there may be Resumes into the follow of the loop, but
- // they won't look at the loopInfo state so there's nothing to do.
- // - If the loop is greedy, or if it is non-greedy with lower > 0, AND it has a non-deterministic body,
- // we may have Resume entries which will resume inside the loop body, which may then run to a
- // RepeatLoop, which will then look at the loopInfo state. However, each iteration is protected by
- // a RestoreLoop by RepeatLoopInst below. (****)
- // - If the loop is non-greedy there may be a RepeatLoop on the stack, so we must restore the loopInfo
- // state before backtracking to it.
- if (!isGreedy && hasOuterLoops)
- {
- PUSH(contStack, RestoreLoopCont, loopId, *loopInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // The loop body must always begin with empty inner groups
- // - if the loop is not in an outer they will be empty due to the reset when the match began
- // - if the loop is in an outer loop, they will have been reset by the outer loop's RepeatLoop instruction
- #if DBG
- for (int i = minBodyGroupId; i <= maxBodyGroupId; i++)
- {
- Assert(matcher.GroupIdToGroupInfo(i)->IsUndefined());
- }
- #endif
- loopInfo->number = 0;
- loopInfo->startInputOffset = inputOffset;
- if (repeats.lower == 0)
- {
- if (isGreedy)
- {
- // CHOICEPOINT: Try one iteration of body, if backtrack continue from here with no iterations
- PUSH(contStack, ResumeCont, inputOffset, exitLabel);
- instPointer += sizeof(*this);
- }
- else
- {
- // CHOICEPOINT: Try no iterations of body, if backtrack do one iteration of body from here
- Assert(instPointer == (uint8*)this);
- PUSH(contStack, RepeatLoopCont, matcher.InstPointerToLabel(instPointer), inputOffset);
- instPointer = matcher.LabelToInstPointer(exitLabel);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- else
- {
- // Must match minimum iterations, so continue to loop body
- instPointer += sizeof(*this);
- }
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginLoopInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BeginLoop("), label);
- BeginLoopMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BodyGroupsMixin::Print(w, litbuf);
- w->PrintEOL(_u(", greedy: %s)"), isGreedy ? _u("true") : _u("false"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RepeatLoopInst
- // ----------------------------------------------------------------------
- __inline bool RepeatLoopInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- BeginLoopInst* begin = matcher.L2I(BeginLoop, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- // See comment (****) above.
- if (begin->hasInnerNondet)
- {
- PUSH(contStack, RestoreLoopCont, begin->loopId, *loopInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- loopInfo->number++;
- if (loopInfo->number < begin->repeats.lower)
- {
- // Must match another iteration of body.
- loopInfo->startInputOffset = inputOffset;
- if(begin->hasInnerNondet)
- {
- // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration.
- // Save the inner groups and reset them for the next iteration.
- matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack);
- }
- else
- {
- // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for
- // the next iteration.
- matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId);
- }
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopInst));
- }
- else if (inputOffset == loopInfo->startInputOffset && loopInfo->number > begin->repeats.lower)
- {
- // The minimum number of iterations has been satisfied but the last iteration made no progress.
- // - With greedy & deterministic body, FAIL so as to undo that iteration and restore group bindings.
- // - With greedy & non-deterministic body, FAIL so as to try another body alternative
- // - With non-greedy, we're trying an additional iteration because the follow failed. But
- // since we didn't consume anything the follow will fail again, so fail
- //
- return matcher.Fail(FAIL_PARAMETERS);
- }
- else if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper)
- {
- // Success: proceed to remainder.
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- }
- else if (begin->isGreedy)
- {
- // CHOICEPOINT: Try one more iteration of body, if backtrack continue from here with no more iterations
- PUSH(contStack, ResumeCont, inputOffset, begin->exitLabel);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- loopInfo->startInputOffset = inputOffset;
- // If backtrack, we must continue with previous group bindings
- matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack);
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopInst));
- }
- else
- {
- // CHOICEPOINT: Try no more iterations of body, if backtrack do one more iteration of body from here
- PUSH(contStack, RepeatLoopCont, beginLabel, inputOffset);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- }
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RepeatLoopInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: RepeatLoop("), label);
- RepeatLoopMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginLoopIfCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool BeginLoopIfCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && input[inputOffset] == c)
- {
- // Commit to at least one iteration of loop
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId);
- // All inner groups must begin reset
- #if DBG
- for (int i = minBodyGroupId; i <= maxBodyGroupId; i++)
- {
- Assert(matcher.GroupIdToGroupInfo(i)->IsUndefined());
- }
- #endif
- loopInfo->number = 0;
- instPointer += sizeof(*this);
- return false;
- }
- if (repeats.lower > 0)
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer = matcher.LabelToInstPointer(exitLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginLoopIfCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BeginLoopIfChar("), label);
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BeginLoopMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BodyGroupsMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginLoopIfSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool BeginLoopIfSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && set.Get(input[inputOffset]))
- {
- // Commit to at least one iteration of loop
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId);
- // All inner groups must be begin reset
- #if DBG
- for (int i = minBodyGroupId; i <= maxBodyGroupId; i++)
- {
- Assert(matcher.GroupIdToGroupInfo(i)->IsUndefined());
- }
- #endif
- loopInfo->startInputOffset = inputOffset;
- loopInfo->number = 0;
- instPointer += sizeof(*this);
- return false;
- }
- if (repeats.lower > 0)
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer = matcher.LabelToInstPointer(exitLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginLoopIfSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BeginLoopIfSet("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BeginLoopMixin::Print(w, litbuf);
- w->Print(_u(", "));
- BodyGroupsMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RepeatLoopIfCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool RepeatLoopIfCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- BeginLoopIfCharInst* begin = matcher.L2I(BeginLoopIfChar, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- if (begin->hasInnerNondet)
- {
- // May end up backtracking into loop body for iteration just completed: see above.
- PUSH(contStack, RestoreLoopCont, begin->loopId, *loopInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- loopInfo->number++;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && input[inputOffset] == begin->c)
- {
- if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper)
- {
- // If the loop body's first set and the loop's follow set are disjoint, we can just fail here since
- // we know the next character in the input is in the loop body's first set.
- return matcher.Fail(FAIL_PARAMETERS);
- }
- // Commit to one more iteration
- if(begin->hasInnerNondet)
- {
- // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration.
- // Save the inner groups and reset them for the next iteration.
- matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack);
- }
- else
- {
- // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for
- // the next iteration.
- matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId);
- }
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopIfCharInst));
- return false;
- }
- if (loopInfo->number < begin->repeats.lower)
- return matcher.Fail(FAIL_PARAMETERS);
- // Proceed to exit
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RepeatLoopIfCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: RepeatLoopIfChar(%d, "), label);
- RepeatLoopMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RepeatLoopIfSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool RepeatLoopIfSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- BeginLoopIfSetInst* begin = matcher.L2I(BeginLoopIfSet, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- if (begin->hasInnerNondet)
- {
- // May end up backtracking into loop body for iteration just completed: see above.
- PUSH(contStack, RestoreLoopCont, begin->loopId, *loopInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- loopInfo->number++;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && begin->set.Get(input[inputOffset]))
- {
- if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper)
- {
- // If the loop body's first set and the loop's follow set are disjoint, we can just fail here since
- // we know the next character in the input is in the loop body's first set.
- return matcher.Fail(FAIL_PARAMETERS);
- }
- // Commit to one more iteration
- if(begin->hasInnerNondet)
- {
- // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration.
- // Save the inner groups and reset them for the next iteration.
- matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack);
- }
- else
- {
- // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for
- // the next iteration.
- matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId);
- }
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopIfSetInst));
- return false;
- }
- if (loopInfo->number < begin->repeats.lower)
- return matcher.Fail(FAIL_PARAMETERS);
- // Proceed to exit
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RepeatLoopIfSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: RepeatLoopIfSet("), label);
- RepeatLoopMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginLoopFixedInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool BeginLoopFixedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId);
- // If loop is contained in an outer loop, continuation stack may already have a RewindLoopFixed entry for
- // this loop. We must make sure it's state is preserved on backtrack.
- if (hasOuterLoops)
- {
- PUSH(contStack, RestoreLoopCont, loopId, *loopInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // startInputOffset will stay here for all iterations, and we'll use number of length to figure out
- // where in the input to rewind to
- loopInfo->number = 0;
- loopInfo->startInputOffset = inputOffset;
- if (repeats.lower == 0)
- {
- // CHOICEPOINT: Try one iteration of body. Failure of body will rewind input to here and resume with follow.
- Assert(instPointer == (uint8*)this);
- PUSH(contStack, RewindLoopFixedCont, matcher.InstPointerToLabel(instPointer), true);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // else: Must match minimum iterations, so continue to loop body. Failure of body signals failure of entire loop.
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginLoopFixedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BeginLoopFixed("), label);
- BeginLoopMixin::Print(w, litbuf);
- w->Print(_u(", "));
- FixedLengthMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RepeatLoopFixedInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool RepeatLoopFixedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- BeginLoopFixedInst* begin = matcher.L2I(BeginLoopFixed, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- loopInfo->number++;
- if (loopInfo->number < begin->repeats.lower)
- {
- // Must match another iteration of body. Failure of body signals failure of the entire loop.
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedInst));
- }
- else if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper)
- {
- // Matched maximum number of iterations. Continue with follow.
- if (begin->repeats.lower < begin->repeats.upper)
- {
- // Failure of follow will try one fewer iterations (subject to repeats.lower).
- // Since loop body is non-deterministic and does not define groups the rewind continuation must be on top of the stack.
- Cont *top = contStack.Top();
- Assert(top != 0);
- Assert(top->tag == Cont::RewindLoopFixed);
- RewindLoopFixedCont* rewind = (RewindLoopFixedCont*)top;
- rewind->tryingBody = false;
- }
- // else: we never pushed a rewind continuation
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- }
- else
- {
- // CHOICEPOINT: Try one more iteration of body. Failure of body will rewind input to here and
- // try follow.
- if (loopInfo->number == begin->repeats.lower)
- {
- // i.e. begin->repeats.lower > 0, so continuation won't have been pushed in BeginLoopFixed
- PUSH(contStack, RewindLoopFixedCont, beginLabel, true);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedInst));
- }
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RepeatLoopFixedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: RepeatLoopFixed("), label);
- RepeatLoopMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // LoopSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool LoopSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId);
- // If loop is contained in an outer loop, continuation stack may already have a RewindLoopFixed entry for
- // this loop. We must make sure it's state is preserved on backtrack.
- if (hasOuterLoops)
- {
- PUSH(contStack, RestoreLoopCont, loopId, *loopInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // startInputOffset will stay here for all iterations, and we'll use number of length to figure out
- // where in the input to rewind to
- loopInfo->startInputOffset = inputOffset;
- // Consume as many elements of set as possible
- const RuntimeCharSet<Char>& matchSet = set;
- const CharCount loopMatchStart = inputOffset;
- const CharCountOrFlag repeatsUpper = repeats.upper;
- const CharCount inputEndOffset =
- static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset
- ? inputLength
- : inputOffset + static_cast<CharCount>(repeatsUpper);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputEndOffset && matchSet.Get(input[inputOffset]))
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- loopInfo->number = inputOffset - loopMatchStart;
- if (loopInfo->number < repeats.lower)
- return matcher.Fail(FAIL_PARAMETERS);
- if (loopInfo->number > repeats.lower)
- {
- // CHOICEPOINT: If follow fails, try consuming one fewer characters
- Assert(instPointer == (uint8*)this);
- PUSH(contStack, RewindLoopSetCont, matcher.InstPointerToLabel(instPointer));
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // else: failure of follow signals failure of entire loop
- // Continue with follow
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int LoopSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: LoopSet(loopId: %d, "), label, loopId);
- repeats.Print(w);
- w->Print(_u(", hasOuterLoops: %s, "), hasOuterLoops ? _u("true") : _u("false"));
- SetMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginLoopFixedGroupLastIterationInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool BeginLoopFixedGroupLastIterationInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined());
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId);
- // If loop is contained in an outer loop, continuation stack may already have a RewindLoopFixedGroupLastIteration entry
- // for this loop. We must make sure it's state is preserved on backtrack.
- if (hasOuterLoops)
- {
- PUSH(contStack, RestoreLoopCont, loopId, *loopInfo);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // If loop is contained in an outer loop or assertion, we must reset the group binding if we backtrack all the way out
- if (!noNeedToSave)
- {
- PUSH(contStack, ResetGroupCont, groupId);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // startInputOffset will stay here for all iterations, and we'll use number of length to figure out
- // where in the input to rewind to
- loopInfo->number = 0;
- loopInfo->startInputOffset = inputOffset;
- if (repeats.lower == 0)
- {
- // CHOICEPOINT: Try one iteration of body. Failure of body will rewind input to here and resume with follow.
- Assert(instPointer == (uint8*)this);
- PUSH(contStack, RewindLoopFixedGroupLastIterationCont, matcher.InstPointerToLabel(instPointer), true);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- // else: Must match minimum iterations, so continue to loop body. Failure of body signals failure of entire loop.
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginLoopFixedGroupLastIterationInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BeginLoopFixedGroupLastIteration("), label);
- BeginLoopMixin::Print(w, litbuf);
- w->Print(_u(", "));
- FixedLengthMixin::Print(w, litbuf);
- w->Print(_u(", "));
- GroupMixin::Print(w, litbuf);
- w->Print(_u(", "));
- NoNeedToSaveMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RepeatLoopFixedGroupLastIterationInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool RepeatLoopFixedGroupLastIterationInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- BeginLoopFixedGroupLastIterationInst* begin = matcher.L2I(BeginLoopFixedGroupLastIteration, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- loopInfo->number++;
- if (loopInfo->number < begin->repeats.lower)
- {
- // Must match another iteration of body. Failure of body signals failure of the entire loop.
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedGroupLastIterationInst));
- }
- else if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper)
- {
- // Matched maximum number of iterations. Continue with follow.
- if (begin->repeats.lower < begin->repeats.upper)
- {
- // Failure of follow will try one fewer iterations (subject to repeats.lower).
- // Since loop body is non-deterministic and does not define groups the rewind continuation must be on top of the stack.
- Cont *top = contStack.Top();
- Assert(top != 0);
- Assert(top->tag == Cont::RewindLoopFixedGroupLastIteration);
- RewindLoopFixedGroupLastIterationCont* rewind = (RewindLoopFixedGroupLastIterationCont*)top;
- rewind->tryingBody = false;
- }
- // else: we never pushed a rewind continuation
- // Bind group
- GroupInfo* groupInfo = matcher.GroupIdToGroupInfo(begin->groupId);
- groupInfo->offset = inputOffset - begin->length;
- groupInfo->length = begin->length;
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- }
- else
- {
- // CHOICEPOINT: Try one more iteration of body. Failure of body will rewind input to here and
- // try follow.
- if (loopInfo->number == begin->repeats.lower)
- {
- // i.e. begin->repeats.lower > 0, so continuation won't have been pushed in BeginLoopFixed
- PUSH(contStack, RewindLoopFixedGroupLastIterationCont, beginLabel, true);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedGroupLastIterationInst));
- }
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RepeatLoopFixedGroupLastIterationInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: RepeatLoopFixedGroupLastIteration("), label);
- RepeatLoopMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginGreedyLoopNoBacktrackInst
- // ----------------------------------------------------------------------
- __inline bool BeginGreedyLoopNoBacktrackInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId);
- loopInfo->number = 0;
- loopInfo->startInputOffset = inputOffset;
- // CHOICEPOINT: Try one iteration of body, if backtrack continue from here with no iterations
- PUSH(contStack, ResumeCont, inputOffset, exitLabel);
- instPointer += sizeof(*this);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginGreedyLoopNoBacktrackInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: BeginGreedyLoopNoBacktrack(loopId: %d)"), label, loopId);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RepeatGreedyLoopNoBacktrackInst
- // ----------------------------------------------------------------------
- __inline bool RepeatGreedyLoopNoBacktrackInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- BeginGreedyLoopNoBacktrackInst* begin = matcher.L2I(BeginGreedyLoopNoBacktrack, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- loopInfo->number++;
- if (inputOffset == loopInfo->startInputOffset)
- {
- // No progress
- return matcher.Fail(FAIL_PARAMETERS);
- }
- else
- {
- // CHOICEPOINT: Try one more iteration of body, if backtrack, continue from here with no more iterations.
- // Since the loop body is deterministic and group free, it wouldn't have left any continuation records.
- // Therefore we can simply update the Resume continuation still on the top of the stack with the current
- // input pointer.
- Cont* top = contStack.Top();
- Assert(top != 0 && top->tag == Cont::Resume);
- ResumeCont* resume = (ResumeCont*)top;
- resume->origInputOffset = inputOffset;
- loopInfo->startInputOffset = inputOffset;
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginGreedyLoopNoBacktrackInst));
- }
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RepeatGreedyLoopNoBacktrackInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: RepeatGreedyLoopNoBacktrack("), label);
- RepeatLoopMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ChompCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- template<ChompMode Mode>
- __inline bool ChompCharInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const Char matchC = c;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(Mode == ChompMode::Star || inputOffset < inputLength && input[inputOffset] == matchC)
- {
- while(true)
- {
- if(Mode != ChompMode::Star)
- ++inputOffset;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(inputOffset < inputLength && input[inputOffset] == matchC)
- {
- if(Mode == ChompMode::Star)
- ++inputOffset;
- continue;
- }
- break;
- }
- instPointer += sizeof(*this);
- return false;
- }
- return matcher.Fail(FAIL_PARAMETERS);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<ChompMode Mode>
- int ChompCharInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: ChompChar<%S>("), label, Mode == ChompMode::Star ? "Star" : "Plus");
- CharMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ChompSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- template<ChompMode Mode>
- __inline bool ChompSetInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const RuntimeCharSet<Char>& matchSet = set;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(Mode == ChompMode::Star || inputOffset < inputLength && matchSet.Get(input[inputOffset]))
- {
- while(true)
- {
- if(Mode != ChompMode::Star)
- ++inputOffset;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(inputOffset < inputLength && matchSet.Get(input[inputOffset]))
- {
- if(Mode == ChompMode::Star)
- ++inputOffset;
- continue;
- }
- break;
- }
- instPointer += sizeof(*this);
- return false;
- }
- return matcher.Fail(FAIL_PARAMETERS);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<ChompMode Mode>
- int ChompSetInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: ChompSet<%S>("), label, Mode == ChompMode::Star ? "Star" : "Plus");
- SetMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ChompCharGroupInst (optimized instruction)
- // ----------------------------------------------------------------------
- template<ChompMode Mode>
- __inline bool ChompCharGroupInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined());
- const CharCount inputStartOffset = inputOffset;
- const Char matchC = c;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(Mode == ChompMode::Star || inputOffset < inputLength && input[inputOffset] == matchC)
- {
- while(true)
- {
- if(Mode != ChompMode::Star)
- ++inputOffset;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(inputOffset < inputLength && input[inputOffset] == matchC)
- {
- if(Mode == ChompMode::Star)
- ++inputOffset;
- continue;
- }
- break;
- }
- if(!noNeedToSave)
- {
- // UNDO ACTION: Restore group on backtrack
- PUSH(contStack, ResetGroupCont, groupId);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId);
- groupInfo->offset = inputStartOffset;
- groupInfo->length = inputOffset - inputStartOffset;
- instPointer += sizeof(*this);
- return false;
- }
- return matcher.Fail(FAIL_PARAMETERS);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<ChompMode Mode>
- int ChompCharGroupInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: ChompCharGroup<%S>("), label, Mode == ChompMode::Star ? "Star" : "Plus");
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- GroupMixin::Print(w, litbuf);
- w->Print(_u(", "));
- NoNeedToSaveMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ChompSetGroupInst (optimized instruction)
- // ----------------------------------------------------------------------
- template<ChompMode Mode>
- __inline bool ChompSetGroupInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined());
- const CharCount inputStartOffset = inputOffset;
- const RuntimeCharSet<Char>& matchSet = set;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(Mode == ChompMode::Star || inputOffset < inputLength && matchSet.Get(input[inputOffset]))
- {
- while(true)
- {
- if(Mode != ChompMode::Star)
- ++inputOffset;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if(inputOffset < inputLength && matchSet.Get(input[inputOffset]))
- {
- if(Mode == ChompMode::Star)
- ++inputOffset;
- continue;
- }
- break;
- }
- if(!noNeedToSave)
- {
- // UNDO ACTION: Restore group on backtrack
- PUSH(contStack, ResetGroupCont, groupId);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId);
- groupInfo->offset = inputStartOffset;
- groupInfo->length = inputOffset - inputStartOffset;
- instPointer += sizeof(*this);
- return false;
- }
- return matcher.Fail(FAIL_PARAMETERS);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- template<ChompMode Mode>
- int ChompSetGroupInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: ChompSetGroup<%S>("), label, Mode == ChompMode::Star ? "Star" : "Plus");
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- GroupMixin::Print(w, litbuf);
- w->Print(_u(", "));
- NoNeedToSaveMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ChompCharBoundedInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool ChompCharBoundedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const Char matchC = c;
- const CharCount loopMatchStart = inputOffset;
- const CharCountOrFlag repeatsUpper = repeats.upper;
- const CharCount inputEndOffset =
- static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset
- ? inputLength
- : inputOffset + static_cast<CharCount>(repeatsUpper);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputEndOffset && input[inputOffset] == matchC)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset - loopMatchStart < repeats.lower)
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int ChompCharBoundedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: ChompCharBounded("), label);
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- ChompBoundedMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ChompSetBoundedInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool ChompSetBoundedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- const RuntimeCharSet<Char>& matchSet = set;
- const CharCount loopMatchStart = inputOffset;
- const CharCountOrFlag repeatsUpper = repeats.upper;
- const CharCount inputEndOffset =
- static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset
- ? inputLength
- : inputOffset + static_cast<CharCount>(repeatsUpper);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputEndOffset && matchSet.Get(input[inputOffset]))
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset - loopMatchStart < repeats.lower)
- return matcher.Fail(FAIL_PARAMETERS);
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int ChompSetBoundedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: ChompSetBounded("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- ChompBoundedMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ChompSetBoundedGroupLastCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool ChompSetBoundedGroupLastCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined());
- const RuntimeCharSet<Char>& matchSet = set;
- const CharCount loopMatchStart = inputOffset;
- const CharCountOrFlag repeatsUpper = repeats.upper;
- const CharCount inputEndOffset =
- static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset
- ? inputLength
- : inputOffset + static_cast<CharCount>(repeatsUpper);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- while (inputOffset < inputEndOffset && matchSet.Get(input[inputOffset]))
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- inputOffset++;
- }
- if (inputOffset - loopMatchStart < repeats.lower)
- return matcher.Fail(FAIL_PARAMETERS);
- if (inputOffset > loopMatchStart)
- {
- if (!noNeedToSave)
- {
- PUSH(contStack, ResetGroupCont, groupId);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- }
- GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId);
- groupInfo->offset = inputOffset - 1;
- groupInfo->length = 1;
- }
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int ChompSetBoundedGroupLastCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: ChompSetBoundedGroupLastChar("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- ChompBoundedMixin::Print(w, litbuf);
- w->Print(_u(", "));
- GroupMixin::Print(w, litbuf);
- w->Print(_u(", "));
- NoNeedToSaveMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // TryInst
- // ----------------------------------------------------------------------
- __inline bool TryInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- // CHOICEPOINT: Resume at fail label on backtrack
- PUSH(contStack, ResumeCont, inputOffset, failLabel);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int TryInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: Try("), label);
- TryMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // TryIfCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool TryIfCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && input[inputOffset] == c)
- {
- // CHOICEPOINT: Resume at fail label on backtrack
- PUSH(contStack, ResumeCont, inputOffset, failLabel);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- // Proceed directly to exit
- instPointer = matcher.LabelToInstPointer(failLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int TryIfCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: TryIfChar("), label);
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- TryMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // TryMatchCharInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool TryMatchCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && input[inputOffset] == c)
- {
- // CHOICEPOINT: Resume at fail label on backtrack
- PUSH(contStack, ResumeCont, inputOffset, failLabel);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- // Proceed directly to exit
- instPointer = matcher.LabelToInstPointer(failLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int TryMatchCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: TryMatchChar("), label);
- CharMixin::Print(w, litbuf);
- w->Print(_u(", "));
- TryMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // TryIfSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool TryIfSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && set.Get(input[inputOffset]))
- {
- // CHOICEPOINT: Resume at fail label on backtrack
- PUSH(contStack, ResumeCont, inputOffset, failLabel);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- // Proceed directly to exit
- instPointer = matcher.LabelToInstPointer(failLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int TryIfSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: TryIfSet("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- TryMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // TryMatchSetInst (optimized instruction)
- // ----------------------------------------------------------------------
- __inline bool TryMatchSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.CompStats();
- #endif
- if (inputOffset < inputLength && set.Get(input[inputOffset]))
- {
- // CHOICEPOINT: Resume at fail label on backtrack
- PUSH(contStack, ResumeCont, inputOffset, failLabel);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- inputOffset++;
- instPointer += sizeof(*this);
- return false;
- }
- // Proceed directly to exit
- instPointer = matcher.LabelToInstPointer(failLabel);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int TryMatchSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: TryMatchSet("), label);
- SetMixin::Print(w, litbuf);
- w->Print(_u(", "));
- TryMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // BeginAssertionInst
- // ----------------------------------------------------------------------
- __inline bool BeginAssertionInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- Assert(instPointer == (uint8*)this);
- if (!isNegation)
- {
- // If the positive assertion binds some groups then on success any RestoreGroup continuations pushed
- // in the assertion body will be cut. Hence if the entire assertion is backtracked over we must restore
- // the current inner group bindings.
- matcher.SaveInnerGroups(minBodyGroupId, maxBodyGroupId, false, input, contStack);
- }
- PUSHA(assertionStack, AssertionInfo, matcher.InstPointerToLabel(instPointer), inputOffset, contStack.Position());
- PUSH(contStack, PopAssertionCont);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.PushStats(contStack, input);
- #endif
- instPointer += sizeof(*this);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int BeginAssertionInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->Print(_u("L%04x: BeginAssertion(isNegation: %s, nextLabel: L%04x, "), label, isNegation ? _u("true") : _u("false"), nextLabel);
- BodyGroupsMixin::Print(w, litbuf);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // EndAssertionInst
- // ----------------------------------------------------------------------
- __inline bool EndAssertionInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
- {
- if (!matcher.PopAssertion(inputOffset, instPointer, contStack, assertionStack, true))
- // Body of negative assertion succeeded, so backtrack
- return matcher.Fail(FAIL_PARAMETERS);
- // else: body of positive assertion succeeded, instruction pointer already at next instruction
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int EndAssertionInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
- {
- w->PrintEOL(_u("L%04x: EndAssertion()"), label);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // Matcher state
- // ----------------------------------------------------------------------
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void LoopInfo::Print(DebugWriter* w) const
- {
- w->Print(_u("number: %u, startInputOffset: %u"), number, startInputOffset);
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void GroupInfo::Print(DebugWriter* w, const Char* const input) const
- {
- if (IsUndefined())
- w->Print(_u("<undefined> (%u)"), offset);
- else
- {
- w->PrintQuotedString(input + offset, (CharCount)length);
- w->Print(_u(" (%u+%u)"), offset, (CharCount)length);
- }
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void AssertionInfo::Print(DebugWriter* w) const
- {
- w->PrintEOL(_u("beginLabel: L%04x, startInputOffset: %u, contStackPosition: $llu"), beginLabel, startInputOffset, static_cast<unsigned long long>(contStackPosition));
- }
- #endif
- // ----------------------------------------------------------------------
- // ResumeCont
- // ----------------------------------------------------------------------
- __inline bool ResumeCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- inputOffset = origInputOffset;
- instPointer = matcher.LabelToInstPointer(origInstLabel);
- return true; // STOP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int ResumeCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("Resume(origInputOffset: %u, origInstLabel: L%04x)"), origInputOffset, origInstLabel);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RestoreLoopCont
- // ----------------------------------------------------------------------
- __inline bool RestoreLoopCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- matcher.QueryContinue(qcTicks);
- *matcher.LoopIdToLoopInfo(loopId) = origLoopInfo;
- return false; // KEEP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RestoreLoopCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->Print(_u("RestoreLoop(loopId: %d, "), loopId);
- origLoopInfo.Print(w);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RestoreGroupCont
- // ----------------------------------------------------------------------
- __inline bool RestoreGroupCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- *matcher.GroupIdToGroupInfo(groupId) = origGroupInfo;
- return false; // KEEP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RestoreGroupCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->Print(_u("RestoreGroup(groupId: %d, "), groupId);
- origGroupInfo.Print(w, input);
- w->PrintEOL(_u(")"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ResetGroupCont
- // ----------------------------------------------------------------------
- __inline bool ResetGroupCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- matcher.ResetGroup(groupId);
- return false; // KEEP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int ResetGroupCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("ResetGroup(groupId: %d)"), groupId);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // ResetGroupRangeCont
- // ----------------------------------------------------------------------
- __inline bool ResetGroupRangeCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- matcher.ResetInnerGroups(fromGroupId, toGroupId);
- return false; // KEEP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int ResetGroupRangeCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("ResetGroupRange(fromGroupId: %d, toGroupId: %d)"), fromGroupId, toGroupId);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RepeatLoopCont
- // ----------------------------------------------------------------------
- __inline bool RepeatLoopCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- matcher.QueryContinue(qcTicks);
- // Try one more iteration of a non-greedy loop
- BeginLoopInst* begin = matcher.L2I(BeginLoop, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- loopInfo->startInputOffset = inputOffset = origInputOffset;
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopInst));
- if(begin->hasInnerNondet)
- {
- // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration.
- // Save the inner groups and reset them for the next iteration.
- matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack);
- }
- else
- {
- // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for
- // the next iteration.
- matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId);
- }
- return true; // STOP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RepeatLoopCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("RepeatLoop(beginLabel: L%04x, origInputOffset: %u)"), beginLabel, origInputOffset);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // PopAssertionCont
- // ----------------------------------------------------------------------
- __inline bool PopAssertionCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- Assert(!assertionStack.IsEmpty());
- if (matcher.PopAssertion(inputOffset, instPointer, contStack, assertionStack, false))
- // Body of negative assertion failed
- return true; // STOP BACKTRACKING
- else
- // Body of positive assertion failed
- return false; // CONTINUE BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int PopAssertionCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("PopAssertion()"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RewindLoopFixedCont
- // ----------------------------------------------------------------------
- __inline bool RewindLoopFixedCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- matcher.QueryContinue(qcTicks);
- BeginLoopFixedInst* begin = matcher.L2I(BeginLoopFixed, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- if (tryingBody)
- {
- tryingBody = false;
- // loopInfo->number is the number of iterations completed before trying body
- Assert(loopInfo->number >= begin->repeats.lower);
- }
- else
- {
- // loopInfo->number is the number of iterations completed before trying follow
- Assert(loopInfo->number > begin->repeats.lower);
- // Try follow with one fewer iteration
- loopInfo->number--;
- }
- // Rewind input
- inputOffset = loopInfo->startInputOffset + loopInfo->number * begin->length;
- if (loopInfo->number > begin->repeats.lower)
- {
- // Un-pop the continuation ready for next time
- contStack.UnPop<RewindLoopFixedCont>();
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.UnPopStats(contStack, input);
- #endif
- }
- // else: Can't try any fewer iterations if follow fails, so leave continuation as popped and let failure propagate
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- return true; // STOP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RewindLoopFixedCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("RewindLoopFixed(beginLabel: L%04x, tryingBody: %s)"), beginLabel, tryingBody ? _u("true") : _u("false"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RewindLoopSetCont
- // ----------------------------------------------------------------------
- __inline bool RewindLoopSetCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- matcher.QueryContinue(qcTicks);
- LoopSetInst* begin = matcher.L2I(LoopSet, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- // >loopInfonumber is the number of iterations completed before trying follow
- Assert(loopInfo->number > begin->repeats.lower);
- // Try follow with one fewer iteration
- loopInfo->number--;
- // Rewind input
- inputOffset = loopInfo->startInputOffset + loopInfo->number;
- if (loopInfo->number > begin->repeats.lower)
- {
- // Un-pop the continuation ready for next time
- contStack.UnPop<RewindLoopSetCont>();
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.UnPopStats(contStack, input);
- #endif
- }
- // else: Can't try any fewer iterations if follow fails, so leave continuation as popped and let failure propagate
- instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(LoopSetInst));
- return true; // STOP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RewindLoopSetCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("RewindLoopSet(beginLabel: L%04x)"), beginLabel);
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // RewindLoopFixedGroupLastIterationCont
- // ----------------------------------------------------------------------
- __inline bool RewindLoopFixedGroupLastIterationCont::Exec(REGEX_CONT_EXEC_PARAMETERS)
- {
- matcher.QueryContinue(qcTicks);
- BeginLoopFixedGroupLastIterationInst* begin = matcher.L2I(BeginLoopFixedGroupLastIteration, beginLabel);
- LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId);
- GroupInfo* groupInfo = matcher.GroupIdToGroupInfo(begin->groupId);
- if (tryingBody)
- {
- tryingBody = false;
- // loopInfo->number is the number of iterations completed before current attempt of body
- Assert(loopInfo->number >= begin->repeats.lower);
- }
- else
- {
- // loopInfo->number is the number of iterations completed before trying follow
- Assert(loopInfo->number > begin->repeats.lower);
- // Try follow with one fewer iteration
- loopInfo->number--;
- }
- // Rewind input
- inputOffset = loopInfo->startInputOffset + loopInfo->number * begin->length;
- if (loopInfo->number > 0)
- {
- // Bind previous iteration's body
- groupInfo->offset = inputOffset - begin->length;
- groupInfo->length = begin->length;
- }
- else
- groupInfo->Reset();
- if (loopInfo->number > begin->repeats.lower)
- {
- // Un-pop the continuation ready for next time
- contStack.UnPop<RewindLoopFixedGroupLastIterationCont>();
- #if ENABLE_REGEX_CONFIG_OPTIONS
- matcher.UnPopStats(contStack, input);
- #endif
- }
- // else: Can't try any fewer iterations if follow fails, so leave continuation as popped and let failure propagate
- instPointer = matcher.LabelToInstPointer(begin->exitLabel);
- return true; // STOP BACKTRACKING
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- int RewindLoopFixedGroupLastIterationCont::Print(DebugWriter* w, const Char* const input) const
- {
- w->PrintEOL(_u("RewindLoopFixedGroupLastIteration(beginLabel: L%04x, tryingBody: %s)"), beginLabel, tryingBody ? _u("true") : _u("false"));
- return sizeof(*this);
- }
- #endif
- // ----------------------------------------------------------------------
- // Matcher
- // ----------------------------------------------------------------------
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void ContStack::Print(DebugWriter* w, const Char* const input) const
- {
- for(Iterator it(*this); it; ++it)
- {
- w->Print(_u("%4llu: "), static_cast<unsigned long long>(it.Position()));
- it->Print(w, input);
- }
- }
- #endif
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void AssertionStack::Print(DebugWriter* w, const Matcher* matcher) const
- {
- for(Iterator it(*this); it; ++it)
- {
- it->Print(w);
- }
- }
- #endif
- Matcher::Matcher(Js::ScriptContext* scriptContext, RegexPattern* pattern)
- : pattern(pattern)
- , standardChars(scriptContext->GetThreadContext()->GetStandardChars((char16*)0))
- , program(pattern->rep.unified.program)
- , groupInfos(nullptr)
- , loopInfos(nullptr)
- , literalNextSyncInputOffsets(nullptr)
- , recycler(scriptContext->GetRecycler())
- , previousQcTime(0)
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , stats(0)
- , w(0)
- #endif
- {
- // Don't need to zero out - the constructor for GroupInfo should take care of it
- groupInfos = RecyclerNewArrayLeaf(recycler, GroupInfo, program->numGroups);
- if (program->numLoops > 0)
- {
- loopInfos = RecyclerNewArrayLeafZ(recycler, LoopInfo, program->numLoops);
- }
- }
- Matcher *Matcher::New(Js::ScriptContext* scriptContext, RegexPattern* pattern)
- {
- return RecyclerNew(scriptContext->GetRecycler(), Matcher, scriptContext, pattern);
- }
- Matcher *Matcher::CloneToScriptContext(Js::ScriptContext *scriptContext, RegexPattern *pattern)
- {
- Matcher *result = New(scriptContext, pattern);
- if (groupInfos)
- {
- size_t size = program->numGroups * sizeof(GroupInfo);
- js_memcpy_s(result->groupInfos, size, groupInfos, size);
- }
- if (loopInfos)
- {
- size_t size = program->numLoops * sizeof(LoopInfo);
- js_memcpy_s(result->loopInfos, size, loopInfos, size);
- }
- return result;
- }
- #if DBG
- const uint32 contTags[] = {
- #define M(O) Cont::O,
- #include "RegexContcodes.h"
- #undef M
- };
- const uint32 minContTag = contTags[0];
- const uint32 maxContTag = contTags[(sizeof(contTags) / sizeof(uint32)) - 1];
- #endif
- void Matcher::DoQueryContinue(const uint qcTicks)
- {
- // See definition of TimePerQc for description of regex QC heuristics
- const uint before = previousQcTime;
- const uint now = GetTickCount();
- if((!before || now - before < TimePerQc) && qcTicks & TicksPerQc - 1)
- return;
- previousQcTime = now;
- TraceQueryContinue(now);
- // Query-continue can be reentrant and run the same regex again. To prevent the matcher and other persistent objects
- // from being reused reentrantly, save and restore them around the QC call.
- class AutoCleanup
- {
- private:
- RegexPattern *const pattern;
- Matcher *const matcher;
- RegexStacks * regexStacks;
- public:
- AutoCleanup(RegexPattern *const pattern, Matcher *const matcher) : pattern(pattern), matcher(matcher)
- {
- Assert(pattern);
- Assert(matcher);
- Assert(pattern->rep.unified.matcher == matcher);
- pattern->rep.unified.matcher = nullptr;
- const auto scriptContext = pattern->GetScriptContext();
- regexStacks = scriptContext->SaveRegexStacks();
- }
- ~AutoCleanup()
- {
- pattern->rep.unified.matcher = matcher;
- const auto scriptContext = pattern->GetScriptContext();
- scriptContext->RestoreRegexStacks(regexStacks);
- }
- } autoCleanup(pattern, this);
- pattern->GetScriptContext()->GetThreadContext()->CheckScriptInterrupt();
- }
- void Matcher::TraceQueryContinue(const uint now)
- {
- if(!PHASE_TRACE1(Js::RegexQcPhase))
- return;
- Output::Print(_u("Regex QC"));
- static uint n = 0;
- static uint firstQcTime = 0;
- ++n;
- if(firstQcTime)
- Output::Print(_u(" - frequency: %0.1f"), static_cast<double>(n * 1000) / (now - firstQcTime));
- else
- firstQcTime = now;
- Output::Print(_u("\n"));
- Output::Flush();
- }
- bool Matcher::Fail(const Char* const input, CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks)
- {
- if (!contStack.IsEmpty())
- {
- if (!RunContStack(input, inputOffset, instPointer, contStack, assertionStack, qcTicks))
- {
- return false;
- }
- }
- Assert(assertionStack.IsEmpty());
- groupInfos[0].Reset();
- return true; // STOP EXECUTION
- }
- __inline bool Matcher::RunContStack(const Char* const input, CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks)
- {
- while (true)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- PopStats(contStack, input);
- #endif
- Cont* cont = contStack.Pop();
- if (cont == 0)
- break;
- Assert(cont->tag >= minContTag && cont->tag <= maxContTag);
- // All these cases RESUME EXECUTION if backtracking finds a stop point
- const Cont::ContTag tag = cont->tag;
- switch (tag)
- {
- #define M(O) case Cont::O: if (((O##Cont*)cont)->Exec(*this, input, inputOffset, instPointer, contStack, assertionStack, qcTicks)) return false; break;
- #include "RegexContcodes.h"
- #undef M
- default:
- Assert(false); // should never be reached
- return false; // however, can't use complier optimization if we wnat to return false here
- }
- }
- return true;
- }
- #if DBG
- const uint32 instTags[] = {
- #define M(TagName) Inst::TagName,
- #define MTemplate(TagName, ...) M(TagName)
- #include "RegexOpCodes.h"
- #undef M
- #undef MTemplate
- };
- const uint32 minInstTag = instTags[0];
- const uint32 maxInstTag = instTags[(sizeof(instTags) / sizeof(uint32)) - 1];
- #endif
- __inline void Matcher::Run(const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount &nextSyncInputOffset, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, bool firstIteration)
- {
- CharCount inputOffset = matchStart;
- const uint8 *instPointer = program->rep.insts.insts;
- Assert(instPointer != 0);
- while (true)
- {
- Assert(inputOffset >= matchStart && inputOffset <= inputLength);
- Assert(instPointer >= program->rep.insts.insts && instPointer < program->rep.insts.insts + program->rep.insts.instsLen);
- Assert(((Inst*)instPointer)->tag >= minInstTag && ((Inst*)instPointer)->tag <= maxInstTag);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- if (w != 0)
- Print(w, input, inputLength, inputOffset, instPointer, contStack, assertionStack);
- InstStats();
- #endif
- const Inst *inst = (const Inst*)instPointer;
- const Inst::InstTag tag = inst->tag;
- switch (tag)
- {
- #define MBase(TagName, ClassName) \
- case Inst::TagName: \
- if (((const ClassName *)inst)->Exec(*this, input, inputLength, matchStart, inputOffset, nextSyncInputOffset, instPointer, contStack, assertionStack, qcTicks, firstIteration)) \
- return; \
- break;
- #define M(TagName) MBase(TagName, TagName##Inst)
- #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) MBase(TagName, SpecializedClassName)
- #include "RegexOpCodes.h"
- #undef MBase
- #undef M
- #undef MTemplate
- default:
- Assert(false);
- __assume(false);
- }
- }
- }
- #if DBG
- void Matcher::ResetLoopInfos()
- {
- for (int i = 0; i < program->numLoops; i++)
- loopInfos[i].Reset();
- }
- #endif
- __inline bool Matcher::MatchHere(const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount &nextSyncInputOffset, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, bool firstIteration)
- {
- // Reset the continuation and assertion stacks ready for fresh run
- // NOTE: We used to do this after the Run, but it's safer to do it here in case unusual control flow exits
- // the matcher without executing the clears.
- contStack.Clear();
- // assertionStack may be non-empty since we can hard fail directly out of matcher without popping assertion
- assertionStack.Clear();
- Assert(contStack.IsEmpty());
- Assert(assertionStack.IsEmpty());
- ResetInnerGroups(0, program->numGroups - 1);
- #if DBG
- ResetLoopInfos();
- #endif
- Run(input, inputLength, matchStart, nextSyncInputOffset, contStack, assertionStack, qcTicks, firstIteration);
- // Leave the continuation and assertion stack memory in place so we don't have to alloc next time
- return WasLastMatchSuccessful();
- }
- __inline bool Matcher::MatchSingleCharCaseInsensitive(const Char* const input, const CharCount inputLength, CharCount offset, const Char c)
- {
- CaseInsensitive::MappingSource mappingSource = program->GetCaseMappingSource();
- // If sticky flag is present, break since the 1st character didn't match the pattern character
- if ((program->flags & StickyRegexFlag) != 0)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- if (MatchSingleCharCaseInsensitiveHere(mappingSource, input, offset, c))
- {
- GroupInfo* const info = GroupIdToGroupInfo(0);
- info->offset = offset;
- info->length = 1;
- return true;
- }
- else
- {
- ResetGroup(0);
- return false;
- }
- }
- while (offset < inputLength)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- if (MatchSingleCharCaseInsensitiveHere(mappingSource, input, offset, c))
- {
- GroupInfo* const info = GroupIdToGroupInfo(0);
- info->offset = offset;
- info->length = 1;
- return true;
- }
- offset++;
- }
- ResetGroup(0);
- return false;
- }
- __inline bool Matcher::MatchSingleCharCaseInsensitiveHere(
- CaseInsensitive::MappingSource mappingSource,
- const Char* const input,
- const CharCount offset,
- const Char c)
- {
- return (standardChars->ToCanonical(mappingSource, input[offset]) == standardChars->ToCanonical(mappingSource, c));
- }
- __inline bool Matcher::MatchSingleCharCaseSensitive(const Char* const input, const CharCount inputLength, CharCount offset, const Char c)
- {
- // If sticky flag is present, break since the 1st character didn't match the pattern character
- if ((program->flags & StickyRegexFlag) != 0)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- if (input[offset] == c)
- {
- GroupInfo* const info = GroupIdToGroupInfo(0);
- info->offset = offset;
- info->length = 1;
- return true;
- }
- else
- {
- ResetGroup(0);
- return false;
- }
- }
- while (offset < inputLength)
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- if (input[offset] == c)
- {
- GroupInfo* const info = GroupIdToGroupInfo(0);
- info->offset = offset;
- info->length = 1;
- return true;
- }
- offset++;
- }
- ResetGroup(0);
- return false;
- }
- __inline bool Matcher::MatchBoundedWord(const Char* const input, const CharCount inputLength, CharCount offset)
- {
- const StandardChars<Char>& stdchrs = *standardChars;
- if (offset >= inputLength)
- {
- ResetGroup(0);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- if ((offset == 0 && stdchrs.IsWord(input[0])) ||
- (offset > 0 && (!stdchrs.IsWord(input[offset - 1]) && stdchrs.IsWord(input[offset]))))
- {
- // Already at start of word
- }
- // If sticky flag is present, return false since we are not at the beginning of the word yet
- else if ((program->flags & StickyRegexFlag) == StickyRegexFlag)
- {
- ResetGroup(0);
- return false;
- }
- else
- {
- if (stdchrs.IsWord(input[offset]))
- {
- // Scan for end of current word
- while (true)
- {
- offset++;
- if (offset >= inputLength)
- {
- ResetGroup(0);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- if (!stdchrs.IsWord(input[offset]))
- break;
- }
- }
- // Scan for start of next word
- while (true)
- {
- offset++;
- if (offset >= inputLength)
- {
- ResetGroup(0);
- return false;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- if (stdchrs.IsWord(input[offset]))
- break;
- }
- }
- GroupInfo* const info = GroupIdToGroupInfo(0);
- info->offset = offset;
- // Scan for end of word
- do
- {
- offset++;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- }
- while (offset < inputLength && stdchrs.IsWord(input[offset]));
- info->length = offset - info->offset;
- return true;
- }
- __inline bool Matcher::MatchLeadingTrailingSpaces(const Char* const input, const CharCount inputLength, CharCount offset)
- {
- GroupInfo* const info = GroupIdToGroupInfo(0);
- Assert(offset <= inputLength);
- Assert((program->flags & MultilineRegexFlag) == 0);
- if (offset >= inputLength)
- {
- Assert(offset == inputLength);
- if (program->rep.leadingTrailingSpaces.endMinMatch == 0 ||
- (offset == 0 && program->rep.leadingTrailingSpaces.beginMinMatch == 0))
- {
- info->offset = offset;
- info->length = 0;
- return true;
- }
- info->Reset();
- return false;
- }
- const StandardChars<Char> &stdchrs = *standardChars;
- if (offset == 0)
- {
- while (offset < inputLength && stdchrs.IsWhitespaceOrNewline(input[offset]))
- {
- offset++;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- }
- if (offset >= program->rep.leadingTrailingSpaces.beginMinMatch)
- {
- info->offset = 0;
- info->length = offset;
- return true;
- }
- }
- Assert(inputLength > 0);
- const CharCount initOffset = offset;
- offset = inputLength - 1;
- while (offset >= initOffset && stdchrs.IsWhitespaceOrNewline(input[offset]))
- {
- // This can never underflow since initOffset > 0
- Assert(offset > 0);
- offset--;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- CompStats();
- #endif
- }
- offset++;
- CharCount length = inputLength - offset;
- if (length >= program->rep.leadingTrailingSpaces.endMinMatch)
- {
- info->offset = offset;
- info->length = length;
- return true;
- }
- info->Reset();
- return false;
- }
- __inline bool Matcher::MatchOctoquad(const Char* const input, const CharCount inputLength, CharCount offset, OctoquadMatcher* matcher)
- {
- if (matcher->Match
- ( input
- , inputLength
- , offset
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , stats
- #endif
- ))
- {
- GroupInfo* const info = GroupIdToGroupInfo(0);
- info->offset = offset;
- info->length = TrigramInfo::PatternLength;
- return true;
- }
- else
- {
- ResetGroup(0);
- return false;
- }
- }
- __inline bool Matcher::MatchBOILiteral2(const Char* const input, const CharCount inputLength, CharCount offset, DWORD literal2)
- {
- if (offset == 0 && inputLength >= 2)
- {
- CompileAssert(sizeof(Char) == 2);
- const Program * program = this->program;
- if (program->rep.boiLiteral2.literal == *(DWORD *)input)
- {
- GroupInfo* const info = GroupIdToGroupInfo(0);
- info->offset = 0;
- info->length = 2;
- return true;
- }
- }
- ResetGroup(0);
- return false;
- }
- bool Matcher::Match
- ( const Char* const input
- , const CharCount inputLength
- , CharCount offset
- , Js::ScriptContext * scriptContext
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , RegexStats* stats
- , DebugWriter* w
- #endif
- )
- {
- #if ENABLE_REGEX_CONFIG_OPTIONS
- this->stats = stats;
- this->w = w;
- #endif
- Assert(offset <= inputLength);
- bool res;
- bool loopMatchHere = true;
- Program const *prog = this->program;
- bool isStickyPresent = this->pattern->IsSticky();
- switch (prog->tag)
- {
- case Program::BOIInstructionsTag:
- if (offset != 0)
- {
- groupInfos[0].Reset();
- res = false;
- break;
- }
- // fall through
- case Program::BOIInstructionsForStickyFlagTag:
- AssertMsg(prog->tag == Program::BOIInstructionsTag || isStickyPresent, "prog->tag should be BOIInstructionsForStickyFlagTag if sticky = true.");
- loopMatchHere = false;
- // fall through
- case Program::InstructionsTag:
- {
- previousQcTime = 0;
- uint qcTicks = 0;
- // This is the next offset in the input from where we will try to sync. For sync instructions that back up, this
- // is used to avoid trying to sync when we have not yet reached the offset in the input we last synced to before
- // backing up.
- CharCount nextSyncInputOffset = offset;
- RegexStacks * regexStacks = scriptContext->RegexStacks();
- // Need to continue matching even if matchStart == inputLim since some patterns may match an empty string at the end
- // of the input. For instance: /a*$/.exec("b")
- bool firstIteration = true;
- do
- {
- // Let there be only one call to MatchHere(), as that call expands the interpreter loop in-place. Having
- // multiple calls to MatchHere() would bloat the code.
- res = MatchHere(input, inputLength, offset, nextSyncInputOffset, regexStacks->contStack, regexStacks->assertionStack, qcTicks, firstIteration);
- firstIteration = false;
- } while(!res && loopMatchHere && ++offset <= inputLength);
- break;
- }
- case Program::SingleCharTag:
- if (this->pattern->IsIgnoreCase())
- {
- res = MatchSingleCharCaseInsensitive(input, inputLength, offset, prog->rep.singleChar.c);
- }
- else
- {
- res = MatchSingleCharCaseSensitive(input, inputLength, offset, prog->rep.singleChar.c);
- }
- break;
- case Program::BoundedWordTag:
- res = MatchBoundedWord(input, inputLength, offset);
- break;
- case Program::LeadingTrailingSpacesTag:
- res = MatchLeadingTrailingSpaces(input, inputLength, offset);
- break;
- case Program::OctoquadTag:
- res = MatchOctoquad(input, inputLength, offset, prog->rep.octoquad.matcher);
- break;
- case Program::BOILiteral2Tag:
- res = MatchBOILiteral2(input, inputLength, offset, prog->rep.boiLiteral2.literal);
- break;
- default:
- Assert(false);
- __assume(false);
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- this->stats = 0;
- this->w = 0;
- #endif
- return res;
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Matcher::Print(DebugWriter* w, const Char* const input, const CharCount inputLength, CharCount inputOffset, const uint8* instPointer, ContStack &contStack, AssertionStack &assertionStack) const
- {
- w->PrintEOL(_u("Matcher {"));
- w->Indent();
- w->Print(_u("program: "));
- w->PrintQuotedString(program->source, program->sourceLen);
- w->EOL();
- w->Print(_u("inputPointer: "));
- if (inputLength == 0)
- w->PrintEOL(_u("<empty input>"));
- else if (inputLength > 1024)
- w->PrintEOL(_u("<string too large>"));
- else
- {
- w->PrintEscapedString(input, inputOffset);
- if (inputOffset >= inputLength)
- w->Print(_u("<<<>>>"));
- else
- {
- w->Print(_u("<<<"));
- w->PrintEscapedChar(input[inputOffset]);
- w->Print(_u(">>>"));
- w->PrintEscapedString(input + inputOffset + 1, inputLength - inputOffset - 1);
- }
- w->EOL();
- }
- if (program->tag == Program::BOIInstructionsTag || program->tag == Program::InstructionsTag)
- {
- w->Print(_u("instPointer: "));
- ((const Inst*)instPointer)->Print(w, InstPointerToLabel(instPointer), program->rep.insts.litbuf);
- w->PrintEOL(_u("groups:"));
- w->Indent();
- for (int i = 0; i < program->numGroups; i++)
- {
- w->Print(_u("%d: "), i);
- groupInfos[i].Print(w, input);
- w->EOL();
- }
- w->Unindent();
- w->PrintEOL(_u("loops:"));
- w->Indent();
- for (int i = 0; i < program->numLoops; i++)
- {
- w->Print(_u("%d: "), i);
- loopInfos[i].Print(w);
- w->EOL();
- }
- w->Unindent();
- w->PrintEOL(_u("contStack: (top to bottom)"));
- w->Indent();
- contStack.Print(w, input);
- w->Unindent();
- w->PrintEOL(_u("assertionStack: (top to bottom)"));
- w->Indent();
- assertionStack.Print(w, this);
- w->Unindent();
- }
- w->Unindent();
- w->PrintEOL(_u("}"));
- w->Flush();
- }
- #endif
- // ----------------------------------------------------------------------
- // Program
- // ----------------------------------------------------------------------
- Program::Program(RegexFlags flags)
- : source(0)
- , sourceLen(0)
- , flags(flags)
- , numGroups(0)
- , numLoops(0)
- {
- tag = InstructionsTag;
- rep.insts.insts = 0;
- rep.insts.instsLen = 0;
- rep.insts.litbuf = 0;
- rep.insts.litbufLen = 0;
- rep.insts.scannersForSyncToLiterals = 0;
- }
- Program *Program::New(Recycler *recycler, RegexFlags flags)
- {
- return RecyclerNew(recycler, Program, flags);
- }
- ScannerInfo **Program::CreateScannerArrayForSyncToLiterals(Recycler *const recycler)
- {
- Assert(tag == InstructionsTag);
- Assert(!rep.insts.scannersForSyncToLiterals);
- Assert(recycler);
- return
- rep.insts.scannersForSyncToLiterals =
- RecyclerNewArrayZ(recycler, ScannerInfo *, ScannersMixin::MaxNumSyncLiterals);
- }
- ScannerInfo *Program::AddScannerForSyncToLiterals(
- Recycler *const recycler,
- const int scannerIndex,
- const CharCount offset,
- const CharCount length,
- const bool isEquivClass)
- {
- Assert(tag == InstructionsTag);
- Assert(rep.insts.scannersForSyncToLiterals);
- Assert(recycler);
- Assert(scannerIndex >= 0);
- Assert(scannerIndex < ScannersMixin::MaxNumSyncLiterals);
- Assert(!rep.insts.scannersForSyncToLiterals[scannerIndex]);
- return
- rep.insts.scannersForSyncToLiterals[scannerIndex] =
- RecyclerNewLeaf(recycler, ScannerInfo, offset, length, isEquivClass);
- }
- void Program::FreeBody(ArenaAllocator* rtAllocator)
- {
- if(tag != InstructionsTag || !rep.insts.insts)
- return;
- Inst *inst = reinterpret_cast<Inst *>(rep.insts.insts);
- const auto instEnd = reinterpret_cast<Inst *>(reinterpret_cast<uint8 *>(inst) + rep.insts.instsLen);
- Assert(inst < instEnd);
- do
- {
- switch(inst->tag)
- {
- #define MBase(TagName, ClassName) \
- case Inst::TagName: \
- { \
- const auto actualInst = static_cast<ClassName *>(inst); \
- actualInst->FreeBody(rtAllocator); \
- inst = actualInst + 1; \
- break; \
- }
- #define M(TagName) MBase(TagName, TagName##Inst)
- #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) MBase(TagName, SpecializedClassName)
- #include "RegexOpCodes.h"
- #undef MBase
- #undef M
- #undef MTemplate
- default:
- Assert(false);
- __assume(false);
- }
- } while(inst < instEnd);
- Assert(inst == instEnd);
- #if DBG
- rep.insts.insts = 0;
- rep.insts.instsLen = 0;
- #endif
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Program::Print(DebugWriter* w)
- {
- w->PrintEOL(_u("Program {"));
- w->Indent();
- w->PrintEOL(_u("source: %s"), source);
- w->Print(_u("flags: "));
- if ((flags & GlobalRegexFlag) != 0) w->Print(_u("global "));
- if ((flags & MultilineRegexFlag) != 0) w->Print(_u("multiline "));
- if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase"));
- if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode"));
- if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky"));
- w->EOL();
- w->PrintEOL(_u("numGroups: %d"), numGroups);
- w->PrintEOL(_u("numLoops: %d"), numLoops);
- switch (tag)
- {
- case BOIInstructionsTag:
- case InstructionsTag:
- {
- w->PrintEOL(_u("instructions: {"));
- w->Indent();
- if (tag == BOIInstructionsTag)
- {
- w->PrintEOL(_u(" BOITest(hardFail: true)"));
- }
- uint8* instsLim = rep.insts.insts + rep.insts.instsLen;
- uint8* curr = rep.insts.insts;
- while (curr != instsLim)
- curr += ((Inst*)curr)->Print(w, (Label)(curr - rep.insts.insts), rep.insts.litbuf);
- w->Unindent();
- w->PrintEOL(_u("}"));
- }
- break;
- case SingleCharTag:
- w->Print(_u("special form: <match single char "));
- w->PrintQuotedChar(rep.singleChar.c);
- w->PrintEOL(_u(">"));
- break;
- case BoundedWordTag:
- w->PrintEOL(_u("special form: <match bounded word>"));
- break;
- case LeadingTrailingSpacesTag:
- w->PrintEOL(_u("special form: <match leading/trailing spaces: minBegin=%d minEnd=%d>"),
- rep.leadingTrailingSpaces.beginMinMatch, rep.leadingTrailingSpaces.endMinMatch);
- break;
- case OctoquadTag:
- w->Print(_u("special form: <octoquad "));
- rep.octoquad.matcher->Print(w);
- w->PrintEOL(_u(">"));
- break;
- }
- w->Unindent();
- w->PrintEOL(_u("}"));
- }
- #endif
- #define M(...)
- #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) template struct SpecializedClassName;
- #include "RegexOpCodes.h"
- #undef M
- #undef MTemplate
- }
|