RegexRuntime.h 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. //
  6. // Regex programs and their execution context
  7. //
  8. #pragma once
  9. namespace UnifiedRegex
  10. {
  11. typedef CharCount Label;
  12. // FORWARD
  13. struct ScannerInfo;
  14. class ContStack;
  15. class AssertionStack;
  16. class OctoquadMatcher;
  17. enum class ChompMode : uint8
  18. {
  19. Star, // min = 0, max = infinite
  20. Plus // min = 1, max = infinite
  21. };
  22. // ----------------------------------------------------------------------
  23. // Programs
  24. // ----------------------------------------------------------------------
  25. struct Program : private Chars<char16>
  26. {
  27. friend class Compiler;
  28. friend struct MatchLiteralNode;
  29. friend struct AltNode;
  30. friend class Matcher;
  31. friend struct LoopInfo;
  32. template <typename ScannerT>
  33. friend struct SyncToLiteralAndConsumeInstT;
  34. template <typename ScannerT>
  35. friend struct SyncToLiteralAndContinueInstT;
  36. template <typename ScannerT>
  37. friend struct SyncToLiteralAndBackupInstT;
  38. template <typename ScannerT>
  39. friend struct ScannerMixinT;
  40. template <uint lastPatCharEquivClassSize>
  41. friend struct EquivScannerMixinT;
  42. #define M(TagName) friend struct TagName##Inst;
  43. #define MTemplate(TagName, TemplateDeclaration, GenericClassName, ...) TemplateDeclaration friend struct GenericClassName;
  44. #include "RegexOpCodes.h"
  45. #undef M
  46. #undef MTemplate
  47. public:
  48. // Copy of original text of regex (without delimiting '/'s or trailing flags), null terminated.
  49. // In run-time allocator, owned by program
  50. Field(Char*) source;
  51. Field(CharCount) sourceLen; // length in char16's, NOT including terminating null
  52. // Number of capturing groups (including implicit overall group at index 0)
  53. Field(int) numGroups;
  54. Field(int) numLoops;
  55. Field(RegexFlags) flags;
  56. private:
  57. enum ProgramTag : uint8
  58. {
  59. InstructionsTag,
  60. BOIInstructionsTag,
  61. BOIInstructionsForStickyFlagTag,
  62. SingleCharTag,
  63. BoundedWordTag,
  64. LeadingTrailingSpacesTag,
  65. OctoquadTag,
  66. BOILiteral2Tag
  67. };
  68. Field(ProgramTag) tag;
  69. struct Instructions
  70. {
  71. // Instruction array, in run-time allocator, owned by program, never null
  72. Field(uint8*) insts;
  73. Field(CharCount) instsLen; // in bytes
  74. // Literals
  75. // In run-time allocator, owned by program, may be 0
  76. Field(CharCount) litbufLen; // length of litbuf in char16's, no terminating null
  77. Field(Char*) litbuf;
  78. // These scanner infos are used by ScannersMixin, which is used by only SyncToLiteralsAndBackupInst. There will only
  79. // ever be only one of those instructions per program. Since scanners are large (> 1 KB), for that instruction they
  80. // are allocated on the recycler with pointers stored here to reference them.
  81. Field(Field(ScannerInfo *)*) scannersForSyncToLiterals;
  82. };
  83. struct SingleChar
  84. {
  85. Field(Char) c;
  86. Field(uint8) padding[sizeof(Instructions) - sizeof(Char)];
  87. };
  88. struct Octoquad
  89. {
  90. Field(OctoquadMatcher*) matcher;
  91. Field(uint8) padding[sizeof(Instructions) - sizeof(void*)];
  92. };
  93. struct BOILiteral2
  94. {
  95. Field(DWORD) literal;
  96. Field(uint8) padding[sizeof(Instructions) - sizeof(DWORD)];
  97. };
  98. struct LeadingTrailingSpaces
  99. {
  100. Field(CharCount) beginMinMatch;
  101. Field(CharCount) endMinMatch;
  102. Field(uint8) padding[sizeof(Instructions) - (sizeof(CharCount) * 2)];
  103. };
  104. struct Other
  105. {
  106. Field(uint8) padding[sizeof(Instructions)];
  107. };
  108. union RepType
  109. {
  110. Field(Instructions) insts;
  111. Field(SingleChar) singleChar;
  112. Field(Octoquad) octoquad;
  113. Field(BOILiteral2) boiLiteral2;
  114. Field(LeadingTrailingSpaces) leadingTrailingSpaces;
  115. Field(Other) other;
  116. RepType() {}
  117. };
  118. Field(RepType) rep;
  119. public:
  120. Program(RegexFlags flags);
  121. static Program *New(Recycler *recycler, RegexFlags flags);
  122. static size_t GetOffsetOfTag() { return offsetof(Program, tag); }
  123. static size_t GetOffsetOfRep() { return offsetof(Program, rep); }
  124. static size_t GetOffsetOfBOILiteral2Literal() { return offsetof(BOILiteral2, literal); }
  125. static ProgramTag GetBOILiteral2Tag() { return ProgramTag::BOILiteral2Tag; }
  126. Field(ScannerInfo *)*CreateScannerArrayForSyncToLiterals(Recycler *const recycler);
  127. ScannerInfo *AddScannerForSyncToLiterals(
  128. Recycler *const recycler,
  129. const int scannerIndex,
  130. const CharCount offset,
  131. const CharCount length,
  132. const bool isEquivClass);
  133. void FreeBody(ArenaAllocator* rtAllocator);
  134. inline CaseInsensitive::MappingSource GetCaseMappingSource() const
  135. {
  136. return (flags & UnicodeRegexFlag) != 0
  137. ? CaseInsensitive::MappingSource::CaseFolding
  138. : CaseInsensitive::MappingSource::UnicodeData;
  139. }
  140. #if ENABLE_REGEX_CONFIG_OPTIONS
  141. void Print(DebugWriter* w);
  142. #endif
  143. };
  144. class Matcher;
  145. // ----------------------------------------------------------------------
  146. // CountDomain
  147. // ----------------------------------------------------------------------
  148. struct CountDomain : private Chars<char16>
  149. {
  150. CharCount lower;
  151. CharCountOrFlag upper; // CharCountFlag => unbounded
  152. inline CountDomain() : lower(0), upper(CharCountFlag) {}
  153. inline CountDomain(CharCount exact) : lower(exact), upper(exact) {}
  154. inline CountDomain(CharCount lower, CharCountOrFlag upper) : lower(lower), upper(upper) {}
  155. inline void Exact(CharCount n)
  156. {
  157. lower = upper = n;
  158. }
  159. inline void Unknown()
  160. {
  161. lower = 0;
  162. upper = CharCountFlag;
  163. }
  164. inline void Lub(const CountDomain& other)
  165. {
  166. lower = min(lower, other.lower);
  167. upper = upper == CharCountFlag || other.upper == CharCountFlag ? CharCountFlag : max(upper, other.upper);
  168. }
  169. inline void Add(const CountDomain& other)
  170. {
  171. lower = lower + other.lower;
  172. upper = upper == CharCountFlag || other.upper == CharCountFlag ? CharCountFlag : upper + other.upper;
  173. }
  174. inline void Sub(const CountDomain& other)
  175. {
  176. lower = other.upper == CharCountFlag || other.upper > lower ? 0 : lower - other.upper;
  177. upper = upper == CharCountFlag ? CharCountFlag : (other.lower > upper ? 0 : upper - other.lower);
  178. }
  179. inline void Mult(const CountDomain& other)
  180. {
  181. if (lower != 0)
  182. {
  183. CharCount maxOther = MaxCharCount / lower;
  184. if (other.lower > maxOther)
  185. // Clip to maximum
  186. lower = MaxCharCount;
  187. else
  188. lower *= other.lower;
  189. }
  190. if (upper != 0 && upper != CharCountFlag)
  191. {
  192. if (other.upper == CharCountFlag)
  193. upper = CharCountFlag;
  194. else
  195. {
  196. CharCount maxOther = MaxCharCount / upper;
  197. if (other.upper > maxOther)
  198. // Clip to 'unbounded'
  199. upper = CharCountFlag;
  200. else
  201. upper *= other.upper;
  202. }
  203. }
  204. }
  205. inline bool CouldMatchEmpty() const
  206. {
  207. return lower == 0;
  208. }
  209. inline bool IsUnbounded() const
  210. {
  211. return upper == CharCountFlag;
  212. }
  213. inline bool IsFixed() const
  214. {
  215. return lower == upper;
  216. }
  217. inline bool IsExact(CharCount n) const
  218. {
  219. return lower == n && upper == n;
  220. }
  221. inline bool IsGreaterThan(const CountDomain& other) const
  222. {
  223. return other.upper != CharCountFlag && lower > other.upper;
  224. }
  225. inline bool IsLessThan(const CountDomain& other) const
  226. {
  227. return upper != CharCountFlag && upper < other.lower;
  228. }
  229. #if ENABLE_REGEX_CONFIG_OPTIONS
  230. void Print(DebugWriter* w) const;
  231. #endif
  232. };
  233. // ----------------------------------------------------------------------
  234. // Mix-in types
  235. // ----------------------------------------------------------------------
  236. // Contains information about how much to back up after syncing to a literal (for the SyncTo... instructions)
  237. struct BackupMixin
  238. {
  239. const CountDomain backup; // range of characters to backup, if upper is CharCountFlag then backup to existing matchStart
  240. inline BackupMixin(const CountDomain& backup) : backup(backup) {}
  241. #if ENABLE_REGEX_CONFIG_OPTIONS
  242. void Print(DebugWriter* w, const char16* litbuf) const;
  243. #endif
  244. };
  245. struct CharMixin
  246. {
  247. char16 c;
  248. inline CharMixin(char16 c) : c(c) {}
  249. #if ENABLE_REGEX_CONFIG_OPTIONS
  250. void Print(DebugWriter* w, const char16* litbuf) const;
  251. #endif
  252. };
  253. struct Char2Mixin
  254. {
  255. char16 cs[2];
  256. inline Char2Mixin(char16 c0, char16 c1) { cs[0] = c0; cs[1] = c1; }
  257. #if ENABLE_REGEX_CONFIG_OPTIONS
  258. void Print(DebugWriter* w, const char16* litbuf) const;
  259. #endif
  260. };
  261. struct Char3Mixin
  262. {
  263. char16 cs[3];
  264. inline Char3Mixin(char16 c0, char16 c1, char16 c2) { cs[0] = c0; cs[1] = c1; cs[2] = c2; }
  265. #if ENABLE_REGEX_CONFIG_OPTIONS
  266. void Print(DebugWriter* w, const char16* litbuf) const;
  267. #endif
  268. };
  269. struct Char4Mixin
  270. {
  271. char16 cs[4];
  272. inline Char4Mixin(char16 c0, char16 c1, char16 c2, char16 c3) { cs[0] = c0; cs[1] = c1; cs[2] = c2; cs[3] = c3; }
  273. #if ENABLE_REGEX_CONFIG_OPTIONS
  274. void Print(DebugWriter* w, const char16* litbuf) const;
  275. #endif
  276. };
  277. struct LiteralMixin
  278. {
  279. Field(CharCount) offset; // into program's literal buffer
  280. Field(CharCount) length; // in char16's
  281. inline LiteralMixin(CharCount offset, CharCount length) : offset(offset), length(length) {}
  282. #if ENABLE_REGEX_CONFIG_OPTIONS
  283. void Print(DebugWriter* w, const char16* litbuf, bool isEquivClass) const;
  284. #endif
  285. };
  286. template<bool IsNegation>
  287. struct SetMixin
  288. {
  289. RuntimeCharSet<char16> set; // contents always lives in run-time allocator
  290. // set must always be cloned from source
  291. void FreeBody(ArenaAllocator* rtAllocator);
  292. #if ENABLE_REGEX_CONFIG_OPTIONS
  293. void Print(DebugWriter* w, const char16* litbuf) const;
  294. #endif
  295. };
  296. struct Char2LiteralScannerMixin : Char2Mixin
  297. {
  298. // scanner must be setup
  299. Char2LiteralScannerMixin(CharCount offset, CharCount length) : Char2Mixin(0, 0) { Assert(length == 2); }
  300. void Setup(char16 c0, char16 c1) { cs[0] = c0; cs[1] = c1; }
  301. CharCount GetLiteralLength() const { return 2; }
  302. bool Match(Matcher& matcher, const char16* const input, const CharCount inputLength, CharCount& inputOffset) const;
  303. #if ENABLE_REGEX_CONFIG_OPTIONS
  304. void Print(DebugWriter* w, const char16* litbuf) const;
  305. #endif
  306. };
  307. template <typename ScannerT>
  308. struct ScannerMixinT : LiteralMixin
  309. {
  310. Field(ScannerT) scanner;
  311. // scanner must be setup
  312. ScannerMixinT(CharCount offset, CharCount length) : LiteralMixin(offset, length) {}
  313. CharCount GetLiteralLength() const { return length; }
  314. bool Match(Matcher& matcher, const char16* const input, const CharCount inputLength, CharCount& inputOffset) const;
  315. void FreeBody(ArenaAllocator* rtAllocator);
  316. #if ENABLE_REGEX_CONFIG_OPTIONS
  317. void Print(DebugWriter* w, const char16* litbuf, bool isEquivClass = false) const;
  318. #endif
  319. };
  320. typedef ScannerMixinT<TextbookBoyerMoore<char16>> ScannerMixin;
  321. typedef ScannerMixinT<TextbookBoyerMooreWithLinearMap<char16>> ScannerMixin_WithLinearCharMap;
  322. template <uint lastPatCharEquivCLassSize>
  323. struct EquivScannerMixinT : ScannerMixin
  324. {
  325. // scanner must be setup
  326. EquivScannerMixinT(CharCount offset, CharCount length) : ScannerMixin(offset, length) {}
  327. bool Match(Matcher& matcher, const char16* const input, const CharCount inputLength, CharCount& inputOffset) const;
  328. #if ENABLE_REGEX_CONFIG_OPTIONS
  329. void Print(DebugWriter* w, const char16* litbuf) const;
  330. #endif
  331. };
  332. typedef EquivScannerMixinT<CaseInsensitive::EquivClassSize> EquivScannerMixin;
  333. typedef EquivScannerMixinT<1> EquivTrivialLastPatCharScannerMixin;
  334. struct ScannerInfo : ScannerMixin
  335. {
  336. Field(bool) isEquivClass;
  337. // scanner must be setup
  338. inline ScannerInfo(CharCount offset, CharCount length, bool isEquivClass) : ScannerMixin(offset, length), isEquivClass(isEquivClass) {}
  339. #if ENABLE_REGEX_CONFIG_OPTIONS
  340. void Print(DebugWriter* w, const char16* litbuf) const;
  341. #endif
  342. };
  343. struct ScannersMixin
  344. {
  345. static const int MaxNumSyncLiterals = 4;
  346. int numLiterals;
  347. Field(ScannerInfo*)* infos;
  348. // scanner mixins must be added
  349. inline ScannersMixin(Recycler *const recycler, Program *const program)
  350. : numLiterals(0), infos(program->CreateScannerArrayForSyncToLiterals(recycler))
  351. {
  352. }
  353. // Only used at compile time
  354. ScannerInfo* Add(Recycler *recycler, Program *program, CharCount offset, CharCount length, bool isEquivClass);
  355. void FreeBody(ArenaAllocator* rtAllocator);
  356. #if ENABLE_REGEX_CONFIG_OPTIONS
  357. void Print(DebugWriter* w, const char16* litbuf) const;
  358. #endif
  359. };
  360. struct HardFailMixin
  361. {
  362. bool canHardFail;
  363. inline HardFailMixin(bool canHardFail) : canHardFail(canHardFail) {}
  364. #if ENABLE_REGEX_CONFIG_OPTIONS
  365. void Print(DebugWriter* w, const char16* litbuf) const;
  366. #endif
  367. };
  368. struct GroupMixin
  369. {
  370. const int groupId;
  371. inline GroupMixin(int groupId) : groupId(groupId) {}
  372. #if ENABLE_REGEX_CONFIG_OPTIONS
  373. void Print(DebugWriter* w, const char16* litbuf) const;
  374. #endif
  375. };
  376. struct ChompBoundedMixin
  377. {
  378. const CountDomain repeats; // if upper is CharCountFlag, consume as many characters as possible
  379. inline ChompBoundedMixin(const CountDomain& repeats) : repeats(repeats) {}
  380. #if ENABLE_REGEX_CONFIG_OPTIONS
  381. void Print(DebugWriter* w, const char16* litbuf) const;
  382. #endif
  383. };
  384. struct JumpMixin
  385. {
  386. Label targetLabel;
  387. // targetLabel must always be fixed up
  388. inline JumpMixin()
  389. {
  390. #if DBG
  391. targetLabel = (Label)-1;
  392. #endif
  393. }
  394. #if ENABLE_REGEX_CONFIG_OPTIONS
  395. void Print(DebugWriter* w, const char16* litbuf) const;
  396. #endif
  397. };
  398. struct BodyGroupsMixin
  399. {
  400. int minBodyGroupId;
  401. int maxBodyGroupId;
  402. inline BodyGroupsMixin(int minBodyGroupId, int maxBodyGroupId) : minBodyGroupId(minBodyGroupId), maxBodyGroupId(maxBodyGroupId) {}
  403. #if ENABLE_REGEX_CONFIG_OPTIONS
  404. void Print(DebugWriter* w, const char16* litbuf) const;
  405. #endif
  406. };
  407. struct BeginLoopMixin
  408. {
  409. int loopId;
  410. const CountDomain repeats;
  411. bool hasOuterLoops;
  412. bool hasInnerNondet;
  413. Label exitLabel;
  414. // exitLabel must always be fixed up
  415. inline BeginLoopMixin(int loopId, const CountDomain& repeats, bool hasOuterLoops, bool hasInnerNondet)
  416. : loopId(loopId), repeats(repeats), hasOuterLoops(hasOuterLoops), hasInnerNondet(hasInnerNondet)
  417. {
  418. #if DBG
  419. exitLabel = (Label)-1;
  420. #endif
  421. }
  422. #if ENABLE_REGEX_CONFIG_OPTIONS
  423. void Print(DebugWriter* w, const char16* litbuf) const;
  424. #endif
  425. };
  426. struct RepeatLoopMixin
  427. {
  428. Label beginLabel; // label of the BeginLoopX instruction
  429. inline RepeatLoopMixin(Label beginLabel) : beginLabel(beginLabel) {}
  430. #if ENABLE_REGEX_CONFIG_OPTIONS
  431. void Print(DebugWriter* w, const char16* litbuf) const;
  432. #endif
  433. };
  434. struct TryMixin
  435. {
  436. Label failLabel;
  437. // failLabel must always be fixed up
  438. inline TryMixin()
  439. {
  440. #if DBG
  441. failLabel = (Label)-1;
  442. #endif
  443. }
  444. #if ENABLE_REGEX_CONFIG_OPTIONS
  445. void Print(DebugWriter* w, const char16* litbuf) const;
  446. #endif
  447. };
  448. struct FixedLengthMixin
  449. {
  450. CharCount length;
  451. inline FixedLengthMixin(CharCount length) : length(length) {}
  452. #if ENABLE_REGEX_CONFIG_OPTIONS
  453. void Print(DebugWriter* w, const char16* litbuf) const;
  454. #endif
  455. };
  456. struct NoNeedToSaveMixin
  457. {
  458. bool noNeedToSave;
  459. inline NoNeedToSaveMixin(bool noNeedToSave) : noNeedToSave(noNeedToSave) {}
  460. #if ENABLE_REGEX_CONFIG_OPTIONS
  461. void Print(DebugWriter* w, const char16* litbuf) const;
  462. #endif
  463. };
  464. struct SwitchCase
  465. {
  466. char16 c;
  467. Label targetLabel;
  468. #if ENABLE_REGEX_CONFIG_OPTIONS
  469. void Print(DebugWriter* w) const;
  470. #endif
  471. };
  472. template <int n>
  473. struct SwitchMixin
  474. {
  475. static const int MaxCases = n;
  476. int numCases;
  477. // numCases cases, in increasing character order
  478. SwitchCase cases[MaxCases];
  479. // Cases must always be added
  480. inline SwitchMixin() : numCases(0)
  481. {
  482. #if DBG
  483. for (int i = 0; i < MaxCases; i++)
  484. {
  485. cases[i].c = (char16)-1;
  486. cases[i].targetLabel = (Label)-1;
  487. }
  488. #endif
  489. }
  490. // Only used at compile time
  491. void AddCase(char16 c, Label targetLabel);
  492. #if ENABLE_REGEX_CONFIG_OPTIONS
  493. void Print(DebugWriter* w, const char16* litbuf) const;
  494. #endif
  495. };
  496. // ----------------------------------------------------------------------
  497. // Instructions
  498. // ----------------------------------------------------------------------
  499. struct Inst : protected Chars<char16>
  500. {
  501. enum InstTag : uint32
  502. {
  503. #define M(TagName) TagName,
  504. #define MTemplate(TagName, ...) M(TagName)
  505. #include "RegexOpCodes.h"
  506. #undef M
  507. #undef MTemplate
  508. };
  509. Field(InstTag) tag;
  510. inline Inst(InstTag tag) : tag(tag) {}
  511. void FreeBody(ArenaAllocator* rtAllocator) {}
  512. #if ENABLE_REGEX_CONFIG_OPTIONS
  513. static bool IsBaselineMode();
  514. static Label GetPrintLabel(Label label);
  515. virtual int Print(DebugWriter*w, Label label, const Char* litbuf) const = 0;
  516. #endif
  517. };
  518. #define INST_BODY_FREE(T) \
  519. void FreeBody(ArenaAllocator* rtAllocator) \
  520. { \
  521. T::FreeBody(rtAllocator); \
  522. Inst::FreeBody(rtAllocator); \
  523. }
  524. #if ENABLE_REGEX_CONFIG_OPTIONS
  525. #define INST_BODY_PRINT virtual int Print(DebugWriter*w, Label label, const Char* litbuf) const override;
  526. #else
  527. #define INST_BODY_PRINT
  528. #endif
  529. #define REGEX_INST_EXEC_PARAMETERS Matcher& matcher, const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount& inputOffset, CharCount &nextSyncInputOffset, const uint8*& instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, bool firstIteration
  530. #define INST_BODY bool Exec(REGEX_INST_EXEC_PARAMETERS) const; \
  531. INST_BODY_PRINT
  532. //
  533. // Control flow
  534. //
  535. struct FailInst : Inst
  536. {
  537. inline FailInst() : Inst(Fail) {}
  538. INST_BODY
  539. };
  540. struct SuccInst : Inst
  541. {
  542. inline SuccInst() : Inst(Succ) {}
  543. INST_BODY
  544. };
  545. struct JumpInst : Inst, JumpMixin
  546. {
  547. // targetLabel must always be fixed up
  548. inline JumpInst() : Inst(Jump), JumpMixin() {}
  549. INST_BODY
  550. };
  551. struct JumpIfNotCharInst : Inst, CharMixin, JumpMixin
  552. {
  553. // targetLabel must always be fixed up
  554. inline JumpIfNotCharInst(Char c) : Inst(JumpIfNotChar), CharMixin(c), JumpMixin() {}
  555. INST_BODY
  556. };
  557. struct MatchCharOrJumpInst : Inst, CharMixin, JumpMixin
  558. {
  559. // targetLabel must always be fixed up
  560. inline MatchCharOrJumpInst(Char c) : Inst(MatchCharOrJump), CharMixin(c), JumpMixin() {}
  561. INST_BODY
  562. };
  563. struct JumpIfNotSetInst : Inst, SetMixin<false>, JumpMixin
  564. {
  565. // set must always be cloned from source
  566. // targetLabel must always be fixed up
  567. inline JumpIfNotSetInst() : Inst(JumpIfNotSet), JumpMixin() {}
  568. INST_BODY
  569. INST_BODY_FREE(SetMixin<false>)
  570. };
  571. struct MatchSetOrJumpInst : Inst, SetMixin<false>, JumpMixin
  572. {
  573. // set must always be cloned from source
  574. // targetLabel must always be fixed up
  575. inline MatchSetOrJumpInst() : Inst(MatchSetOrJump), JumpMixin() {}
  576. INST_BODY
  577. INST_BODY_FREE(SetMixin<false>)
  578. };
  579. struct Switch10Inst : Inst, SwitchMixin<10>
  580. {
  581. // Cases must always be added
  582. inline Switch10Inst() : Inst(Switch10), SwitchMixin() {}
  583. INST_BODY
  584. };
  585. struct Switch20Inst : Inst, SwitchMixin<20>
  586. {
  587. // Cases must always be added
  588. inline Switch20Inst() : Inst(Switch20), SwitchMixin() {}
  589. INST_BODY
  590. };
  591. struct SwitchAndConsume10Inst : Inst, SwitchMixin<10>
  592. {
  593. // Cases must always be added
  594. inline SwitchAndConsume10Inst() : Inst(SwitchAndConsume10), SwitchMixin() {}
  595. INST_BODY
  596. };
  597. struct SwitchAndConsume20Inst : Inst, SwitchMixin<20>
  598. {
  599. // Cases must always be added
  600. inline SwitchAndConsume20Inst() : Inst(SwitchAndConsume20), SwitchMixin() {}
  601. INST_BODY
  602. };
  603. //
  604. // Built-in assertions
  605. //
  606. struct BOITestInst : Inst, HardFailMixin
  607. {
  608. inline BOITestInst(bool canHardFail) : Inst(BOITest), HardFailMixin(canHardFail) {}
  609. INST_BODY
  610. };
  611. struct EOITestInst : Inst, HardFailMixin
  612. {
  613. inline EOITestInst(bool canHardFail) : Inst(EOITest), HardFailMixin(canHardFail) {}
  614. INST_BODY
  615. };
  616. struct BOLTestInst : Inst
  617. {
  618. inline BOLTestInst() : Inst(BOLTest) {}
  619. INST_BODY
  620. };
  621. struct EOLTestInst : Inst
  622. {
  623. inline EOLTestInst() : Inst(EOLTest) {}
  624. INST_BODY
  625. };
  626. struct WordBoundaryTestInst : Inst
  627. {
  628. bool isNegation;
  629. inline WordBoundaryTestInst(bool isNegation) : Inst(WordBoundaryTest), isNegation(isNegation) {}
  630. INST_BODY
  631. };
  632. //
  633. // Matching
  634. //
  635. struct MatchCharInst : Inst, CharMixin
  636. {
  637. inline MatchCharInst(Char c) : Inst(MatchChar), CharMixin(c) {}
  638. INST_BODY
  639. };
  640. struct MatchChar2Inst : Inst, Char2Mixin
  641. {
  642. inline MatchChar2Inst(Char c0, Char c1) : Inst(MatchChar2), Char2Mixin(c0, c1) {}
  643. INST_BODY
  644. };
  645. struct MatchChar3Inst : Inst, Char3Mixin
  646. {
  647. inline MatchChar3Inst(Char c0, Char c1, Char c2) : Inst(MatchChar3), Char3Mixin(c0, c1, c2) {}
  648. INST_BODY
  649. };
  650. struct MatchChar4Inst : Inst, Char4Mixin
  651. {
  652. inline MatchChar4Inst(Char c0, Char c1, Char c2, Char c3) : Inst(MatchChar4), Char4Mixin(c0, c1, c2, c3) {}
  653. INST_BODY
  654. };
  655. template<bool IsNegation>
  656. struct MatchSetInst : Inst, SetMixin<IsNegation>
  657. {
  658. // set must always be cloned from source
  659. inline MatchSetInst() : Inst(IsNegation ? MatchNegatedSet : MatchSet) {}
  660. INST_BODY
  661. INST_BODY_FREE(SetMixin<IsNegation>)
  662. };
  663. struct MatchLiteralInst : Inst, LiteralMixin
  664. {
  665. inline MatchLiteralInst(CharCount offset, CharCount length) : Inst(MatchLiteral), LiteralMixin(offset, length) {}
  666. INST_BODY
  667. };
  668. struct MatchLiteralEquivInst : Inst, LiteralMixin
  669. {
  670. inline MatchLiteralEquivInst(CharCount offset, CharCount length) : Inst(MatchLiteralEquiv), LiteralMixin(offset, length) {}
  671. INST_BODY
  672. };
  673. struct MatchTrieInst : Inst
  674. {
  675. RuntimeCharTrie trie;
  676. // Trie must always be cloned
  677. inline MatchTrieInst() : Inst(MatchTrie) {}
  678. void FreeBody(ArenaAllocator* rtAllocator);
  679. INST_BODY
  680. };
  681. struct OptMatchCharInst : Inst, CharMixin
  682. {
  683. inline OptMatchCharInst(Char c) : Inst(OptMatchChar), CharMixin(c) {}
  684. INST_BODY
  685. };
  686. struct OptMatchSetInst : Inst, SetMixin<false>
  687. {
  688. // set must always be cloned from source
  689. inline OptMatchSetInst() : Inst(OptMatchSet) {}
  690. INST_BODY
  691. INST_BODY_FREE(SetMixin<false>)
  692. };
  693. //
  694. // Synchronization:
  695. // SyncTo(Char|Char2Set|Set|Char2Literal|Literal|LiteralEquiv|Literals)And(Consume|Continue|Backup)
  696. //
  697. struct SyncToCharAndContinueInst : Inst, CharMixin
  698. {
  699. inline SyncToCharAndContinueInst(Char c) : Inst(SyncToCharAndContinue), CharMixin(c) {}
  700. INST_BODY
  701. };
  702. struct SyncToChar2SetAndContinueInst : Inst, Char2Mixin
  703. {
  704. inline SyncToChar2SetAndContinueInst(Char c0, Char c1) : Inst(SyncToChar2SetAndContinue), Char2Mixin(c0, c1) {}
  705. INST_BODY
  706. };
  707. template<bool IsNegation>
  708. struct SyncToSetAndContinueInst : Inst, SetMixin<IsNegation>
  709. {
  710. // set must always be cloned from source
  711. inline SyncToSetAndContinueInst() : Inst(IsNegation ? SyncToNegatedSetAndContinue : SyncToSetAndContinue) {}
  712. INST_BODY
  713. INST_BODY_FREE(SetMixin<IsNegation>)
  714. };
  715. template <typename ScannerT>
  716. struct SyncToLiteralAndContinueInstT : Inst, ScannerT
  717. {
  718. SyncToLiteralAndContinueInstT(InstTag tag, CharCount offset, CharCount length) : Inst(tag), ScannerT(offset, length) {}
  719. INST_BODY
  720. };
  721. // Specialized version of the SyncToLiteralAndContinueInst for a length 2 literal
  722. struct SyncToChar2LiteralAndContinueInst : SyncToLiteralAndContinueInstT<Char2LiteralScannerMixin>
  723. {
  724. SyncToChar2LiteralAndContinueInst(Char c0, Char c1) :
  725. SyncToLiteralAndContinueInstT(SyncToChar2LiteralAndContinue, 0, 2) { Char2LiteralScannerMixin::Setup(c0, c1); }
  726. };
  727. struct SyncToLiteralAndContinueInst : SyncToLiteralAndContinueInstT<ScannerMixin>
  728. {
  729. // scanner must be setup
  730. SyncToLiteralAndContinueInst(CharCount offset, CharCount length) :
  731. SyncToLiteralAndContinueInstT(SyncToLiteralAndContinue, offset, length) {}
  732. INST_BODY_FREE(ScannerMixin)
  733. };
  734. struct SyncToLinearLiteralAndContinueInst : SyncToLiteralAndContinueInstT<ScannerMixin_WithLinearCharMap>
  735. {
  736. // scanner must be setup
  737. SyncToLinearLiteralAndContinueInst(CharCount offset, CharCount length) :
  738. SyncToLiteralAndContinueInstT(SyncToLinearLiteralAndContinue, offset, length) {}
  739. INST_BODY_FREE(ScannerMixin_WithLinearCharMap)
  740. };
  741. struct SyncToLiteralEquivAndContinueInst : SyncToLiteralAndContinueInstT<EquivScannerMixin>
  742. {
  743. // scanner must be setup
  744. SyncToLiteralEquivAndContinueInst(CharCount offset, CharCount length) :
  745. SyncToLiteralAndContinueInstT(SyncToLiteralEquivAndContinue, offset, length) {}
  746. INST_BODY_FREE(EquivScannerMixin)
  747. };
  748. struct SyncToLiteralEquivTrivialLastPatCharAndContinueInst : SyncToLiteralAndContinueInstT<EquivTrivialLastPatCharScannerMixin>
  749. {
  750. // scanner must be setup
  751. SyncToLiteralEquivTrivialLastPatCharAndContinueInst(CharCount offset, CharCount length) :
  752. SyncToLiteralAndContinueInstT(SyncToLiteralEquivTrivialLastPatCharAndContinue, offset, length) {}
  753. INST_BODY_FREE(EquivTrivialLastPatCharScannerMixin)
  754. };
  755. struct SyncToCharAndConsumeInst : Inst, CharMixin
  756. {
  757. inline SyncToCharAndConsumeInst(Char c) : Inst(SyncToCharAndConsume), CharMixin(c) {}
  758. INST_BODY
  759. };
  760. struct SyncToChar2SetAndConsumeInst : Inst, Char2Mixin
  761. {
  762. inline SyncToChar2SetAndConsumeInst(Char c0, Char c1) : Inst(SyncToChar2SetAndConsume), Char2Mixin(c0, c1) {}
  763. INST_BODY
  764. };
  765. template<bool IsNegation>
  766. struct SyncToSetAndConsumeInst : Inst, SetMixin<IsNegation>
  767. {
  768. // set must always be cloned from source
  769. inline SyncToSetAndConsumeInst() : Inst(IsNegation ? SyncToNegatedSetAndConsume : SyncToSetAndConsume) {}
  770. INST_BODY
  771. INST_BODY_FREE(SetMixin<IsNegation>)
  772. };
  773. template <typename ScannerT>
  774. struct SyncToLiteralAndConsumeInstT : Inst, ScannerT
  775. {
  776. SyncToLiteralAndConsumeInstT(InstTag tag, CharCount offset, CharCount length) : Inst(tag), ScannerT(offset, length) {}
  777. INST_BODY
  778. };
  779. // Specialized version of the SyncToLiteralAndConsumeInst for a length 2 literal
  780. struct SyncToChar2LiteralAndConsumeInst : SyncToLiteralAndConsumeInstT<Char2LiteralScannerMixin>
  781. {
  782. SyncToChar2LiteralAndConsumeInst(Char c0, Char c1) :
  783. SyncToLiteralAndConsumeInstT(SyncToChar2LiteralAndConsume, 0, 2) { Char2LiteralScannerMixin::Setup(c0, c1); }
  784. };
  785. struct SyncToLiteralAndConsumeInst : SyncToLiteralAndConsumeInstT<ScannerMixin>
  786. {
  787. // scanner must be setup
  788. SyncToLiteralAndConsumeInst(CharCount offset, CharCount length) :
  789. SyncToLiteralAndConsumeInstT(SyncToLiteralAndConsume, offset, length) {}
  790. INST_BODY_FREE(ScannerMixin)
  791. };
  792. struct SyncToLinearLiteralAndConsumeInst : SyncToLiteralAndConsumeInstT<ScannerMixin_WithLinearCharMap>
  793. {
  794. // scanner must be setup
  795. SyncToLinearLiteralAndConsumeInst(CharCount offset, CharCount length) :
  796. SyncToLiteralAndConsumeInstT(SyncToLinearLiteralAndConsume, offset, length) {}
  797. INST_BODY_FREE(ScannerMixin_WithLinearCharMap)
  798. };
  799. struct SyncToLiteralEquivAndConsumeInst : SyncToLiteralAndConsumeInstT<EquivScannerMixin>
  800. {
  801. // scanner must be setup
  802. SyncToLiteralEquivAndConsumeInst(CharCount offset, CharCount length) :
  803. SyncToLiteralAndConsumeInstT(SyncToLiteralEquivAndConsume,offset, length) {}
  804. INST_BODY_FREE(EquivScannerMixin)
  805. };
  806. struct SyncToLiteralEquivTrivialLastPatCharAndConsumeInst : SyncToLiteralAndConsumeInstT<EquivTrivialLastPatCharScannerMixin>
  807. {
  808. // scanner must be setup
  809. SyncToLiteralEquivTrivialLastPatCharAndConsumeInst(CharCount offset, CharCount length) :
  810. SyncToLiteralAndConsumeInstT(SyncToLiteralEquivTrivialLastPatCharAndConsume, offset, length) {}
  811. INST_BODY_FREE(EquivTrivialLastPatCharScannerMixin)
  812. };
  813. struct SyncToCharAndBackupInst : Inst, CharMixin, BackupMixin
  814. {
  815. inline SyncToCharAndBackupInst(Char c, const CountDomain& backup) : Inst(SyncToCharAndBackup), CharMixin(c), BackupMixin(backup) {}
  816. INST_BODY
  817. };
  818. template<bool IsNegation>
  819. struct SyncToSetAndBackupInst : Inst, SetMixin<IsNegation>, BackupMixin
  820. {
  821. // set must always be cloned from source
  822. inline SyncToSetAndBackupInst(const CountDomain& backup) : Inst(IsNegation ? SyncToNegatedSetAndBackup : SyncToSetAndBackup), BackupMixin(backup) {}
  823. INST_BODY
  824. INST_BODY_FREE(SetMixin<IsNegation>)
  825. };
  826. template <typename ScannerT>
  827. struct SyncToLiteralAndBackupInstT : Inst, ScannerT, BackupMixin
  828. {
  829. SyncToLiteralAndBackupInstT(InstTag tag, CharCount offset, CharCount length, const CountDomain& backup) : Inst(tag), ScannerT(offset, length), BackupMixin(backup) {}
  830. INST_BODY
  831. };
  832. // Specialized version of the SyncToLiteralAndConsumeInst for a length 2 literal
  833. struct SyncToChar2LiteralAndBackupInst : SyncToLiteralAndBackupInstT<Char2LiteralScannerMixin>
  834. {
  835. SyncToChar2LiteralAndBackupInst(Char c0, Char c1, const CountDomain& backup) :
  836. SyncToLiteralAndBackupInstT(SyncToChar2LiteralAndBackup, 0, 2, backup) { Char2LiteralScannerMixin::Setup(c0, c1); }
  837. };
  838. struct SyncToLiteralAndBackupInst : SyncToLiteralAndBackupInstT<ScannerMixin>
  839. {
  840. // scanner must be setup
  841. SyncToLiteralAndBackupInst(CharCount offset, CharCount length, const CountDomain& backup) :
  842. SyncToLiteralAndBackupInstT(SyncToLiteralAndBackup, offset, length, backup) {}
  843. INST_BODY_FREE(ScannerMixin)
  844. };
  845. struct SyncToLinearLiteralAndBackupInst : SyncToLiteralAndBackupInstT<ScannerMixin_WithLinearCharMap>
  846. {
  847. // scanner must be setup
  848. SyncToLinearLiteralAndBackupInst(CharCount offset, CharCount length, const CountDomain& backup) :
  849. SyncToLiteralAndBackupInstT(SyncToLinearLiteralAndBackup, offset, length, backup) {}
  850. INST_BODY_FREE(ScannerMixin_WithLinearCharMap)
  851. };
  852. struct SyncToLiteralEquivAndBackupInst : SyncToLiteralAndBackupInstT<EquivScannerMixin>
  853. {
  854. // scanner must be setup
  855. SyncToLiteralEquivAndBackupInst(CharCount offset, CharCount length, const CountDomain& backup) :
  856. SyncToLiteralAndBackupInstT(SyncToLiteralEquivAndBackup, offset, length, backup) {}
  857. INST_BODY_FREE(EquivScannerMixin)
  858. };
  859. struct SyncToLiteralEquivTrivialLastPatCharAndBackupInst : SyncToLiteralAndBackupInstT<EquivTrivialLastPatCharScannerMixin>
  860. {
  861. // scanner must be setup
  862. SyncToLiteralEquivTrivialLastPatCharAndBackupInst(CharCount offset, CharCount length, const CountDomain& backup) :
  863. SyncToLiteralAndBackupInstT(SyncToLiteralEquivTrivialLastPatCharAndBackup, offset, length, backup) {}
  864. INST_BODY_FREE(EquivTrivialLastPatCharScannerMixin)
  865. };
  866. struct SyncToLiteralsAndBackupInst : Inst, ScannersMixin, BackupMixin
  867. {
  868. // scanner mixins must be setup
  869. inline SyncToLiteralsAndBackupInst(Recycler *recycler, Program *program, const CountDomain& backup)
  870. : Inst(SyncToLiteralsAndBackup), ScannersMixin(recycler, program), BackupMixin(backup)
  871. {
  872. }
  873. INST_BODY
  874. INST_BODY_FREE(ScannersMixin)
  875. };
  876. //
  877. // Groups
  878. //
  879. struct MatchGroupInst : Inst, GroupMixin
  880. {
  881. inline MatchGroupInst(int groupId) : Inst(MatchGroup), GroupMixin(groupId) {}
  882. INST_BODY
  883. };
  884. struct BeginDefineGroupInst : Inst, GroupMixin
  885. {
  886. inline BeginDefineGroupInst(int groupId) : Inst(BeginDefineGroup), GroupMixin(groupId) {}
  887. INST_BODY
  888. };
  889. struct EndDefineGroupInst : Inst, GroupMixin, NoNeedToSaveMixin
  890. {
  891. inline EndDefineGroupInst(int groupId, bool noNeedToSave)
  892. : Inst(EndDefineGroup), GroupMixin(groupId), NoNeedToSaveMixin(noNeedToSave)
  893. {
  894. }
  895. INST_BODY
  896. };
  897. struct DefineGroupFixedInst : Inst, GroupMixin, FixedLengthMixin, NoNeedToSaveMixin
  898. {
  899. inline DefineGroupFixedInst(int groupId, CharCount length, bool noNeedToSave) : Inst(DefineGroupFixed), GroupMixin(groupId), FixedLengthMixin(length), NoNeedToSaveMixin(noNeedToSave) {}
  900. INST_BODY
  901. };
  902. //
  903. // Loops
  904. //
  905. struct BeginLoopInst : Inst, BeginLoopMixin, BodyGroupsMixin
  906. {
  907. bool isGreedy;
  908. // exitLabel must always be fixed up
  909. inline BeginLoopInst(int loopId, const CountDomain& repeats, bool hasOuterLoops, bool hasInnerNondet, int minBodyGroupId, int maxBodyGroupId, bool isGreedy)
  910. : Inst(BeginLoop), BeginLoopMixin(loopId, repeats, hasOuterLoops, hasInnerNondet), BodyGroupsMixin(minBodyGroupId, maxBodyGroupId), isGreedy(isGreedy)
  911. {}
  912. INST_BODY
  913. };
  914. struct RepeatLoopInst : Inst, RepeatLoopMixin
  915. {
  916. inline RepeatLoopInst(Label beginLabel) : Inst(RepeatLoop), RepeatLoopMixin(beginLabel) {}
  917. INST_BODY
  918. };
  919. struct BeginLoopIfCharInst : Inst, CharMixin, BeginLoopMixin, BodyGroupsMixin
  920. {
  921. // exitLabel must always be fixed up
  922. inline BeginLoopIfCharInst(Char c, int loopId, const CountDomain& repeats, bool hasOuterLoops, bool hasInnerNondet, int minBodyGroupId, int maxBodyGroupId)
  923. : Inst(BeginLoopIfChar), CharMixin(c), BeginLoopMixin(loopId, repeats, hasOuterLoops, hasInnerNondet), BodyGroupsMixin(minBodyGroupId, maxBodyGroupId) {}
  924. INST_BODY
  925. };
  926. struct BeginLoopIfSetInst : Inst, SetMixin<false>, BeginLoopMixin, BodyGroupsMixin
  927. {
  928. // set must always be cloned from source
  929. // exitLabel must always be fixed up
  930. inline BeginLoopIfSetInst(int loopId, const CountDomain& repeats, bool hasOuterLoops, bool hasInnerNondet, int minBodyGroupId, int maxBodyGroupId)
  931. : Inst(BeginLoopIfSet), BeginLoopMixin(loopId, repeats, hasOuterLoops, hasInnerNondet), BodyGroupsMixin(minBodyGroupId, maxBodyGroupId) {}
  932. INST_BODY
  933. INST_BODY_FREE(SetMixin)
  934. };
  935. struct RepeatLoopIfCharInst : Inst, RepeatLoopMixin
  936. {
  937. inline RepeatLoopIfCharInst(Label beginLabel) : Inst(RepeatLoopIfChar), RepeatLoopMixin(beginLabel) {}
  938. INST_BODY
  939. };
  940. struct RepeatLoopIfSetInst : Inst, RepeatLoopMixin
  941. {
  942. inline RepeatLoopIfSetInst(Label beginLabel) : Inst(RepeatLoopIfSet), RepeatLoopMixin(beginLabel) {}
  943. INST_BODY
  944. };
  945. // Loop is greedy, fixed width, deterministic body, no inner groups
  946. struct BeginLoopFixedInst : Inst, BeginLoopMixin, FixedLengthMixin
  947. {
  948. // exitLabel must always be fixed up
  949. inline BeginLoopFixedInst(int loopId, const CountDomain& repeats, bool hasOuterLoops, CharCount length)
  950. : Inst(BeginLoopFixed), BeginLoopMixin(loopId, repeats, hasOuterLoops, false), FixedLengthMixin(length) {}
  951. INST_BODY
  952. };
  953. struct RepeatLoopFixedInst : Inst, RepeatLoopMixin
  954. {
  955. inline RepeatLoopFixedInst(Label beginLabel) : Inst(RepeatLoopFixed), RepeatLoopMixin(beginLabel) {}
  956. INST_BODY
  957. };
  958. // Loop is greedy, contains a MatchSet only
  959. struct LoopSetInst : Inst, SetMixin<false>
  960. {
  961. int loopId;
  962. const CountDomain repeats;
  963. bool hasOuterLoops;
  964. // set must always be cloned from source
  965. inline LoopSetInst(int loopId, const CountDomain& repeats, bool hasOuterLoops)
  966. : Inst(LoopSet), loopId(loopId), repeats(repeats), hasOuterLoops(hasOuterLoops) {}
  967. inline LoopSetInst(InstTag tag, int loopId, const CountDomain& repeats, bool hasOuterLoops)
  968. : Inst(tag), loopId(loopId), repeats(repeats), hasOuterLoops(hasOuterLoops) {}
  969. INST_BODY
  970. INST_BODY_FREE(SetMixin)
  971. };
  972. // Loop is greedy, contains a MatchSet only, first character in its follow set is known
  973. struct LoopSetWithFollowFirstInst : LoopSetInst
  974. {
  975. Char followFirst;
  976. inline LoopSetWithFollowFirstInst(int loopId, const CountDomain& repeats, bool hasOuterLoops, Char followFirst)
  977. : LoopSetInst(InstTag::LoopSetWithFollowFirst, loopId, repeats, hasOuterLoops), followFirst(followFirst) {}
  978. INST_BODY
  979. };
  980. // Loop is greedy, fixed width, deterministic body, one outermost group
  981. struct BeginLoopFixedGroupLastIterationInst : Inst, BeginLoopMixin, FixedLengthMixin, GroupMixin, NoNeedToSaveMixin
  982. {
  983. // exitLabel must always be fixed up
  984. inline BeginLoopFixedGroupLastIterationInst(int loopId, const CountDomain& repeats, bool hasOuterLoops, CharCount length, int groupId, bool noNeedToSave)
  985. : Inst(BeginLoopFixedGroupLastIteration), BeginLoopMixin(loopId, repeats, hasOuterLoops, false), FixedLengthMixin(length), GroupMixin(groupId), NoNeedToSaveMixin(noNeedToSave) {}
  986. INST_BODY
  987. };
  988. struct RepeatLoopFixedGroupLastIterationInst : Inst, RepeatLoopMixin
  989. {
  990. inline RepeatLoopFixedGroupLastIterationInst(Label beginLabel) : Inst(RepeatLoopFixedGroupLastIteration), RepeatLoopMixin(beginLabel) {}
  991. INST_BODY
  992. };
  993. // Loop is greedy, deterministic body, lower == 0, upper == inf, follow is irrefutable, no inner groups
  994. struct BeginGreedyLoopNoBacktrackInst : Inst
  995. {
  996. int loopId;
  997. Label exitLabel;
  998. // exitLabel must always be fixed up
  999. inline BeginGreedyLoopNoBacktrackInst(int loopId) : Inst(BeginGreedyLoopNoBacktrack), loopId(loopId) {}
  1000. INST_BODY
  1001. };
  1002. struct RepeatGreedyLoopNoBacktrackInst : Inst, RepeatLoopMixin
  1003. {
  1004. inline RepeatGreedyLoopNoBacktrackInst(Label beginLabel) : Inst(RepeatGreedyLoopNoBacktrack), RepeatLoopMixin(beginLabel) {}
  1005. INST_BODY
  1006. };
  1007. template<ChompMode Mode>
  1008. struct ChompCharInst : Inst, CharMixin
  1009. {
  1010. ChompCharInst(const Char c) : Inst(Mode == ChompMode::Star ? ChompCharStar : ChompCharPlus), CharMixin(c) {}
  1011. INST_BODY
  1012. };
  1013. template<ChompMode Mode>
  1014. struct ChompSetInst : Inst, SetMixin<false>
  1015. {
  1016. // set must always be cloned from source
  1017. ChompSetInst() : Inst(Mode == ChompMode::Star ? ChompSetStar : ChompSetPlus) {}
  1018. INST_BODY
  1019. INST_BODY_FREE(SetMixin)
  1020. };
  1021. template<ChompMode Mode>
  1022. struct ChompCharGroupInst : Inst, CharMixin, GroupMixin, NoNeedToSaveMixin
  1023. {
  1024. ChompCharGroupInst(const Char c, const int groupId, const bool noNeedToSave)
  1025. : Inst(Mode == ChompMode::Star ? ChompCharGroupStar : ChompCharGroupPlus),
  1026. CharMixin(c),
  1027. GroupMixin(groupId),
  1028. NoNeedToSaveMixin(noNeedToSave)
  1029. {
  1030. }
  1031. INST_BODY
  1032. };
  1033. template<ChompMode Mode>
  1034. struct ChompSetGroupInst : Inst, SetMixin<false>, GroupMixin, NoNeedToSaveMixin
  1035. {
  1036. // set must always be cloned from source
  1037. ChompSetGroupInst(const int groupId, const bool noNeedToSave)
  1038. : Inst(Mode == ChompMode::Star ? ChompSetGroupStar : ChompSetGroupPlus),
  1039. GroupMixin(groupId),
  1040. NoNeedToSaveMixin(noNeedToSave)
  1041. {
  1042. }
  1043. INST_BODY
  1044. INST_BODY_FREE(SetMixin)
  1045. };
  1046. struct ChompCharBoundedInst : Inst, CharMixin, ChompBoundedMixin
  1047. {
  1048. inline ChompCharBoundedInst(Char c, const CountDomain& repeats) : Inst(ChompCharBounded), CharMixin(c), ChompBoundedMixin(repeats) {}
  1049. INST_BODY
  1050. };
  1051. struct ChompSetBoundedInst : Inst, SetMixin<false>, ChompBoundedMixin
  1052. {
  1053. // set must always be cloned from source
  1054. inline ChompSetBoundedInst(const CountDomain& repeats) : Inst(ChompSetBounded), ChompBoundedMixin(repeats) {}
  1055. INST_BODY
  1056. INST_BODY_FREE(SetMixin)
  1057. };
  1058. struct ChompSetBoundedGroupLastCharInst : Inst, SetMixin<false>, ChompBoundedMixin, GroupMixin, NoNeedToSaveMixin
  1059. {
  1060. // set must always be cloned from source
  1061. inline ChompSetBoundedGroupLastCharInst(const CountDomain& repeats, int groupId, bool noNeedToSave) : Inst(ChompSetBoundedGroupLastChar), ChompBoundedMixin(repeats), GroupMixin(groupId), NoNeedToSaveMixin(noNeedToSave) {}
  1062. INST_BODY
  1063. INST_BODY_FREE(SetMixin)
  1064. };
  1065. //
  1066. // Choicepoints
  1067. //
  1068. struct TryInst : Inst, TryMixin
  1069. {
  1070. // failLabel must always be fixed up
  1071. inline TryInst() : Inst(Try), TryMixin() {}
  1072. INST_BODY
  1073. };
  1074. struct TryIfCharInst : Inst, CharMixin, TryMixin
  1075. {
  1076. // failLabel must always be fixed up
  1077. inline TryIfCharInst(Char c) : Inst(TryIfChar), CharMixin(c), TryMixin() {}
  1078. INST_BODY
  1079. };
  1080. struct TryMatchCharInst : Inst, CharMixin, TryMixin
  1081. {
  1082. // failLabel must always be fixed up
  1083. inline TryMatchCharInst(Char c) : Inst(TryMatchChar), CharMixin(c), TryMixin() {}
  1084. INST_BODY
  1085. };
  1086. struct TryIfSetInst : Inst, SetMixin<false>, TryMixin
  1087. {
  1088. // set is always same as matching BeginLoopIfSetInst set
  1089. // failLabel must always be fixed up
  1090. inline TryIfSetInst() : Inst(TryIfSet), TryMixin() {}
  1091. INST_BODY
  1092. INST_BODY_FREE(SetMixin)
  1093. };
  1094. struct TryMatchSetInst : Inst, SetMixin<false>, TryMixin
  1095. {
  1096. // set is always same as matching BeginLoopIfSetInst set
  1097. // failLabel must always be fixed up
  1098. inline TryMatchSetInst() : Inst(TryMatchSet), TryMixin() {}
  1099. INST_BODY
  1100. INST_BODY_FREE(SetMixin)
  1101. };
  1102. //
  1103. // User-defined assertions
  1104. //
  1105. struct BeginAssertionInst : Inst, BodyGroupsMixin
  1106. {
  1107. bool isNegation;
  1108. Label nextLabel;
  1109. // nextLabel must always be fixed up
  1110. inline BeginAssertionInst(bool isNegation, int minBodyGroupId, int maxBodyGroupId) : Inst(BeginAssertion), isNegation(isNegation), BodyGroupsMixin(minBodyGroupId, maxBodyGroupId)
  1111. {
  1112. #if DBG
  1113. nextLabel = (Label)-1;
  1114. #endif
  1115. }
  1116. INST_BODY
  1117. };
  1118. struct EndAssertionInst : Inst
  1119. {
  1120. inline EndAssertionInst() : Inst(EndAssertion) {}
  1121. INST_BODY
  1122. };
  1123. // ----------------------------------------------------------------------
  1124. // Matcher state
  1125. // ----------------------------------------------------------------------
  1126. struct LoopInfo : protected Chars<char16>
  1127. {
  1128. CharCount number; // current iteration number
  1129. CharCount startInputOffset; // input offset where the iteration started
  1130. JsUtil::List<CharCount, ArenaAllocator>* offsetsOfFollowFirst; // list of offsets from startInputOffset where we matched with followFirst
  1131. inline void Reset()
  1132. {
  1133. #if DBG
  1134. // So debug prints will look nice
  1135. number = 0;
  1136. startInputOffset = 0;
  1137. if (offsetsOfFollowFirst)
  1138. {
  1139. offsetsOfFollowFirst->ClearAndZero();
  1140. }
  1141. #endif
  1142. }
  1143. inline void EnsureOffsetsOfFollowFirst(Matcher& matcher);
  1144. #if ENABLE_REGEX_CONFIG_OPTIONS
  1145. void Print(DebugWriter* w) const;
  1146. #endif
  1147. };
  1148. struct GroupInfo : protected Chars<char16>
  1149. {
  1150. Field(CharCount) offset;
  1151. Field(CharCountOrFlag) length; // CharCountFlag => group is undefined
  1152. inline GroupInfo() : offset(0), length(CharCountFlag) {}
  1153. inline GroupInfo(CharCount offset, CharCountOrFlag length) : offset(offset), length(length) {}
  1154. //This constructor will only be called by a cross-site marshalling and thus we shouldn't clear offset and length
  1155. GroupInfo(VirtualTableInfoCtorEnum) { }
  1156. inline bool IsUndefined() const { return length == CharCountFlag; }
  1157. inline CharCount EndOffset() const { Assert(length != CharCountFlag); return offset + (CharCount)length; }
  1158. inline void Reset()
  1159. {
  1160. // The start offset must not be changed when backtracking into the group
  1161. length = CharCountFlag;
  1162. }
  1163. #if ENABLE_REGEX_CONFIG_OPTIONS
  1164. void Print(DebugWriter* w, const Char* const input) const;
  1165. #endif
  1166. };
  1167. struct AssertionInfo : private Chars<char16>
  1168. {
  1169. const Label beginLabel; // label of BeginAssertion instruction
  1170. CharCount startInputOffset; // input offset when begun assertion (so can rewind)
  1171. size_t contStackPosition; // top of continuation stack when begun assertion (so can cut)
  1172. inline AssertionInfo(Label beginLabel, CharCount startInputOffset, size_t contStackPosition)
  1173. : beginLabel(beginLabel), startInputOffset(startInputOffset), contStackPosition(contStackPosition) {}
  1174. #if ENABLE_REGEX_CONFIG_OPTIONS
  1175. void Print(DebugWriter* w) const;
  1176. #endif
  1177. };
  1178. // ----------------------------------------------------------------------
  1179. // Continuations
  1180. // ----------------------------------------------------------------------
  1181. struct Cont : protected Chars<char16>
  1182. {
  1183. enum ContTag : uint32
  1184. {
  1185. #define M(O) O,
  1186. #include "RegexContcodes.h"
  1187. #undef M
  1188. };
  1189. ContTag tag;
  1190. inline Cont(ContTag tag) : tag(tag) {}
  1191. #if ENABLE_REGEX_CONFIG_OPTIONS
  1192. virtual int Print(DebugWriter*w, const Char* const input) const = 0;
  1193. #endif
  1194. };
  1195. #if ENABLE_REGEX_CONFIG_OPTIONS
  1196. #define CONT_PRINT int Print(DebugWriter*w, const Char* const input) const override;
  1197. #else
  1198. #define CONT_PRINT
  1199. #endif
  1200. #define REGEX_CONT_EXEC_PARAMETERS Matcher& matcher, const Char* const input, CharCount& inputOffset, const uint8*& instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks
  1201. #define CONT_BODY bool Exec(REGEX_CONT_EXEC_PARAMETERS); \
  1202. CONT_PRINT
  1203. struct ResumeCont : Cont
  1204. {
  1205. CharCount origInputOffset;
  1206. Label origInstLabel;
  1207. inline ResumeCont(CharCount origInputOffset, Label origInstLabel) : Cont(Resume), origInputOffset(origInputOffset), origInstLabel(origInstLabel) {}
  1208. CONT_BODY
  1209. };
  1210. struct RestoreLoopCont : Cont
  1211. {
  1212. int loopId;
  1213. LoopInfo origLoopInfo;
  1214. RestoreLoopCont(int loopId, LoopInfo& origLoopInfo, Matcher& matcher);
  1215. CONT_BODY
  1216. };
  1217. struct RestoreGroupCont : Cont
  1218. {
  1219. int groupId;
  1220. GroupInfo origGroupInfo;
  1221. RestoreGroupCont(int groupId, const GroupInfo &origGroupInfo)
  1222. : Cont(RestoreGroup), groupId(groupId), origGroupInfo(origGroupInfo)
  1223. {
  1224. }
  1225. CONT_BODY
  1226. };
  1227. struct ResetGroupCont : Cont
  1228. {
  1229. const int groupId;
  1230. ResetGroupCont(const int groupId) : Cont(ResetGroup), groupId(groupId) {}
  1231. CONT_BODY
  1232. };
  1233. struct ResetGroupRangeCont : Cont
  1234. {
  1235. const int fromGroupId;
  1236. const int toGroupId;
  1237. ResetGroupRangeCont(const int fromGroupId, const int toGroupId)
  1238. : Cont(ResetGroupRange), fromGroupId(fromGroupId), toGroupId(toGroupId)
  1239. {
  1240. Assert(fromGroupId >= 0);
  1241. Assert(toGroupId >= 0);
  1242. Assert(fromGroupId < toGroupId);
  1243. }
  1244. CONT_BODY
  1245. };
  1246. struct RepeatLoopCont : Cont
  1247. {
  1248. Label beginLabel; // label of BeginLoop instruction
  1249. CharCount origInputOffset; // where to go back to
  1250. inline RepeatLoopCont(Label beginLabel, CharCount origInputOffset) : Cont(RepeatLoop), beginLabel(beginLabel), origInputOffset(origInputOffset) {}
  1251. CONT_BODY
  1252. };
  1253. struct PopAssertionCont : Cont
  1254. {
  1255. inline PopAssertionCont() : Cont(PopAssertion) {}
  1256. CONT_BODY
  1257. };
  1258. struct RewindLoopFixedCont : Cont
  1259. {
  1260. Label beginLabel; // label of BeginLoopFixed instruction
  1261. bool tryingBody; // true if attempting an additional iteration of loop body, otherwise attempting loop follow
  1262. inline RewindLoopFixedCont(Label beginLabel, bool tryingBody) : Cont(RewindLoopFixed), beginLabel(beginLabel), tryingBody(tryingBody) {}
  1263. CONT_BODY
  1264. };
  1265. struct RewindLoopSetCont : Cont
  1266. {
  1267. Label beginLabel; // label of LoopSet instruction
  1268. inline RewindLoopSetCont(Label beginLabel) : Cont(RewindLoopSet), beginLabel(beginLabel) {}
  1269. CONT_BODY
  1270. };
  1271. struct RewindLoopSetWithFollowFirstCont : Cont
  1272. {
  1273. Label beginLabel; // label of LoopSet instruction
  1274. inline RewindLoopSetWithFollowFirstCont(Label beginLabel) : Cont(RewindLoopSetWithFollowFirst), beginLabel(beginLabel) {}
  1275. CONT_BODY
  1276. };
  1277. struct RewindLoopFixedGroupLastIterationCont : Cont
  1278. {
  1279. Label beginLabel; // label of BeginLoopFixedGroupLastIteration instruction
  1280. bool tryingBody; // true if attempting an additional iteration of loop body, otherwise attempting loop follow
  1281. inline RewindLoopFixedGroupLastIterationCont(Label beginLabel, bool tryingBody) : Cont(RewindLoopFixedGroupLastIteration), beginLabel(beginLabel), tryingBody(tryingBody) {}
  1282. CONT_BODY
  1283. };
  1284. // ----------------------------------------------------------------------
  1285. // Matcher
  1286. // ----------------------------------------------------------------------
  1287. class ContStack : public ContinuousPageStackOfVariableElements<Cont>, private Chars<char16>
  1288. {
  1289. public:
  1290. inline ContStack(PageAllocator *const pageAllocator, void (*const outOfMemoryFunc)())
  1291. : ContinuousPageStackOfVariableElements(pageAllocator, outOfMemoryFunc)
  1292. {
  1293. }
  1294. #if ENABLE_REGEX_CONFIG_OPTIONS
  1295. void Print(DebugWriter* w, const Char* const input) const;
  1296. #endif
  1297. };
  1298. class AssertionStack : public ContinuousPageStackOfFixedElements<AssertionInfo>
  1299. {
  1300. public:
  1301. inline AssertionStack(PageAllocator *const pageAllocator, void (*const outOfMemoryFunc)())
  1302. : ContinuousPageStackOfFixedElements(pageAllocator, outOfMemoryFunc)
  1303. {
  1304. }
  1305. #if ENABLE_REGEX_CONFIG_OPTIONS
  1306. void Print(DebugWriter* w, const Matcher* const matcher) const;
  1307. #endif
  1308. };
  1309. struct RegexStacks
  1310. {
  1311. RegexStacks(PageAllocator * pageAllocator) :
  1312. contStack(pageAllocator, Js::Throw::OutOfMemory),
  1313. assertionStack(pageAllocator, Js::Throw::OutOfMemory) {};
  1314. ContStack contStack;
  1315. AssertionStack assertionStack;
  1316. };
  1317. enum HardFailMode
  1318. {
  1319. BacktrackAndLater,
  1320. BacktrackOnly,
  1321. LaterOnly,
  1322. ImmediateFail
  1323. };
  1324. class Matcher : private Chars<char16>
  1325. {
  1326. #define M(TagName) friend struct TagName##Inst;
  1327. #define MTemplate(TagName, TemplateDeclaration, GenericClassName, ...) TemplateDeclaration friend struct GenericClassName;
  1328. #include "RegexOpCodes.h"
  1329. #undef M
  1330. #undef MTemplate
  1331. #define M(O) friend O##Cont;
  1332. #include "RegexContcodes.h"
  1333. #undef M
  1334. template <typename ScannerT>
  1335. friend struct SyncToLiteralAndConsumeInstT;
  1336. template <typename ScannerT>
  1337. friend struct SyncToLiteralAndContinueInstT;
  1338. template <typename ScannerT>
  1339. friend struct SyncToLiteralAndBackupInstT;
  1340. template <typename ScannerT>
  1341. friend struct ScannerMixinT;
  1342. friend struct Char2LiteralScannerMixin;
  1343. template <uint lastPatCharEquivClassSize>
  1344. friend struct EquivScannerMixinT;
  1345. friend GroupInfo;
  1346. friend LoopInfo;
  1347. public:
  1348. static const uint TicksPerQc;
  1349. static const uint TicksPerQcTimeCheck;
  1350. static const uint TimePerQc; // milliseconds
  1351. private:
  1352. Field(RegexPattern *) const pattern;
  1353. Field(StandardChars<Char>*) standardChars;
  1354. Field(const Program*) program;
  1355. Field(GroupInfo*) groupInfos;
  1356. Field(LoopInfo*) loopInfos;
  1357. // Furthest offsets in the input string that we tried to match during a scan.
  1358. // This is used to prevent unnecessary retraversal of the input string.
  1359. //
  1360. // Assume we have the RegExp /<(foo|bar)/ and the input string "<0bar<0bar<0bar".
  1361. // When we try to match the string, we first scan it fully for "foo", but can't
  1362. // find it. Then we scan for "bar" and find it at index 2. However, since there
  1363. // is no '<' right before it, we continue with our search. We do the same thing
  1364. // two more times starting at indexes 7 and 12 (since those are where the "bar"s
  1365. // are), each time scanning the rest of the string fully for "foo".
  1366. //
  1367. // However, if we cache the furthest offsets we tried, we can skip the searches
  1368. // for "foo" after the first time.
  1369. Field(CharCount*) literalNextSyncInputOffsets;
  1370. FieldNoBarrier(Recycler*) recycler;
  1371. Field(uint) previousQcTime;
  1372. #if ENABLE_REGEX_CONFIG_OPTIONS
  1373. FieldNoBarrier(RegexStats*) stats;
  1374. FieldNoBarrier(DebugWriter*) w;
  1375. #endif
  1376. public:
  1377. Matcher(Js::ScriptContext* scriptContext, RegexPattern* pattern);
  1378. static Matcher *New(Js::ScriptContext* scriptContext, RegexPattern* pattern);
  1379. bool Match
  1380. ( const Char* const input
  1381. , const CharCount inputLength
  1382. , CharCount offset
  1383. , Js::ScriptContext * scriptContext
  1384. #if ENABLE_REGEX_CONFIG_OPTIONS
  1385. , RegexStats* stats
  1386. , DebugWriter* w
  1387. #endif
  1388. );
  1389. inline bool WasLastMatchSuccessful() const
  1390. {
  1391. return !groupInfos[0].IsUndefined();
  1392. }
  1393. inline int NumGroups() const
  1394. {
  1395. return program->numGroups;
  1396. }
  1397. inline GroupInfo GetGroup(int groupId) const
  1398. {
  1399. return *GroupIdToGroupInfo(groupId);
  1400. }
  1401. #if ENABLE_REGEX_CONFIG_OPTIONS
  1402. void Print(DebugWriter* w, const Char* const input, const CharCount inputLength, CharCount inputOffset, const uint8* instPointer, ContStack &contStack, AssertionStack &assertionStack) const;
  1403. #endif
  1404. private:
  1405. #if ENABLE_REGEX_CONFIG_OPTIONS
  1406. void PushStats(ContStack& contStack, const Char* const input) const;
  1407. void PopStats(ContStack& contStack, const Char* const input) const;
  1408. void UnPopStats(ContStack& contStack, const Char* const input) const;
  1409. void CompStats() const;
  1410. void InstStats() const;
  1411. #endif
  1412. private:
  1413. inline void QueryContinue(uint &qcTicks);
  1414. void DoQueryContinue(const uint qcTicks);
  1415. public:
  1416. static void TraceQueryContinue(const uint now);
  1417. private:
  1418. // Try backtracking, or return true if should stop. There could be a match using a later starting point.
  1419. bool Fail(const Char* const input, CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks);
  1420. bool RunContStack(const Char* const input, CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks);
  1421. // As above, but control whether to try backtracking or later matches
  1422. inline bool HardFail(const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, HardFailMode mode);
  1423. inline void Run(const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount &nextSyncInputOffset, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, bool firstIteration);
  1424. inline bool MatchHere(const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount &nextSyncInputOffset, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, bool firstIteration);
  1425. // Return true if assertion succeeded
  1426. inline bool PopAssertion(CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, bool isFailed);
  1427. inline Label InstPointerToLabel(const uint8* inst) const
  1428. {
  1429. Assert(inst >= program->rep.insts.insts && inst < program->rep.insts.insts + program->rep.insts.instsLen);
  1430. return (Label)((uint8*)inst - program->rep.insts.insts);
  1431. }
  1432. inline uint8* LabelToInstPointer(Label label) const
  1433. {
  1434. Assert(label < program->rep.insts.instsLen);
  1435. return program->rep.insts.insts + label;
  1436. }
  1437. template <typename T>
  1438. inline T* LabelToInstPointer(Inst::InstTag tag, Label label) const
  1439. {
  1440. Assert(label + sizeof(T) <= program->rep.insts.instsLen);
  1441. Assert(((Inst*)(program->rep.insts.insts + label))->tag == tag);
  1442. return (T*)(program->rep.insts.insts + label);
  1443. }
  1444. inline LoopInfo* LoopIdToLoopInfo(int loopId)
  1445. {
  1446. Assert(loopId >= 0 && loopId < program->numLoops);
  1447. return loopInfos + loopId;
  1448. }
  1449. public:
  1450. inline GroupInfo* GroupIdToGroupInfo(int groupId) const
  1451. {
  1452. Assert(groupId >= 0 && groupId < program->numGroups);
  1453. return groupInfos + groupId;
  1454. }
  1455. Matcher *CloneToScriptContext(Js::ScriptContext *scriptContext, RegexPattern *pattern);
  1456. private:
  1457. typedef bool (UnifiedRegex::Matcher::*ComparerForSingleChar)(const Char left, const Char right);
  1458. Field(ComparerForSingleChar) comparerForSingleChar;
  1459. // Specialized matcher for regex c - case insensitive
  1460. inline bool MatchSingleCharCaseInsensitive(const Char* const input, const CharCount inputLength, CharCount offset, const Char c);
  1461. inline bool MatchSingleCharCaseInsensitiveHere(CaseInsensitive::MappingSource mappingSource, const Char* const input, CharCount offset, const Char c);
  1462. // Specialized matcher for regex c - case sensitive
  1463. inline bool MatchSingleCharCaseSensitive(const Char* const input, const CharCount inputLength, CharCount offset, const Char c);
  1464. // Specialized matcher for regex \b\w+\b
  1465. inline bool MatchBoundedWord(const Char* const input, const CharCount inputLength, CharCount offset);
  1466. // Specialized matcher for regex ^\s*|\s*$
  1467. inline bool MatchLeadingTrailingSpaces(const Char* const input, const CharCount inputLength, CharCount offset);
  1468. // Specialized matcher for octoquad patterns
  1469. inline bool MatchOctoquad(const Char* const input, const CharCount inputLength, CharCount offset, OctoquadMatcher* matcher);
  1470. // Specialized matcher for regex ^literal
  1471. inline bool MatchBOILiteral2(const Char * const input, const CharCount inputLength, CharCount offset, DWORD literal2);
  1472. void SaveInnerGroups(const int fromGroupId, const int toGroupId, const bool reset, const Char *const input, ContStack &contStack);
  1473. void DoSaveInnerGroups(const int fromGroupId, const int toGroupId, const bool reset, const Char *const input, ContStack &contStack);
  1474. void SaveInnerGroups_AllUndefined(const int fromGroupId, const int toGroupId, const Char *const input, ContStack &contStack);
  1475. void DoSaveInnerGroups_AllUndefined(const int fromGroupId, const int toGroupId, const Char *const input, ContStack &contStack);
  1476. void ResetGroup(int groupId);
  1477. void ResetInnerGroups(int minGroupId, int maxGroupId);
  1478. #if DBG
  1479. void ResetLoopInfos();
  1480. #endif
  1481. };
  1482. }
  1483. #undef INST_BODY_FREE
  1484. #undef INST_BODY_PRINT
  1485. #undef INST_BODY
  1486. #undef CONT_PRINT
  1487. #undef CONT_BODY