| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- //
- // Regex parsing and AST-to-AST transformations/analysis
- //
- #pragma once
- #include "RuntimeCommon.h"
- namespace UnifiedRegex
- {
- // FORWARD
- class Compiler;
- // ----------------------------------------------------------------------
- // Node
- // ----------------------------------------------------------------------
- struct Node : protected Chars<char16>
- {
- // Optimization heuristics
- static const int maxSyncToSetSize = 256;
- static const int preferredMinSyncToLiteralLength = 3;
- static const int maxNumSyncLiterals = ScannersMixin::MaxNumSyncLiterals;
- static const int minRemainLengthForTest = 4;
- static const int maxCharsForConditionalTry = 20;
- static const int maxTrieArmExpansion = 16;
- enum NodeTag : uint16
- {
- // SimpleNode
- Empty, // (|...), etc
- BOL, // ^
- EOL, // $
- // WordBoundaryNode
- WordBoundary, // \b, \B
- // MatchLiteralNode
- MatchLiteral, // abc, non-empty
- // MatchCharNode
- MatchChar, // a
- // ConcatNode
- Concat, // e e
- // AltNode
- Alt, // e | e
- // DefineGroupNode
- DefineGroup, // (e)
- // MatchGroupNode
- MatchGroup, // \1
- // LoopNode
- Loop, // e*, e+, e{n,m}, e*?, e+?, e{n,m}?
- // MatchSetNode
- MatchSet, // [...], [^...], \b, \B, \d, \D, \s, \S, \w, \W, .
- // AssertionNode
- Assertion // (?=e), (?!e)
- };
- enum Features : uint16
- {
- HasEmpty = 1 << Empty,
- HasBOL = 1 << BOL,
- HasEOL = 1 << EOL,
- HasWordBoundary = 1 << WordBoundary,
- HasMatchLiteral = 1 << MatchLiteral,
- HasMatchChar = 1 << MatchChar,
- HasConcat = 1 << Concat,
- HasAlt = 1 << Alt,
- HasDefineGroup = 1 << DefineGroup,
- HasMatchGroup = 1 << MatchGroup,
- HasLoop = 1 << Loop,
- HasMatchSet = 1 << MatchSet,
- HasAssertion = 1 << Assertion
- };
- NodeTag tag;
- // Features of this and all child nodes
- uint16 features;
- // See comment for firstSet
- bool isFirstExact : 1;
- // True if pattern can never fail
- bool isThisIrrefutable : 1;
- // True if following patterns can never fail
- bool isFollowIrrefutable : 1;
- // True if pattern matches one or more word characters
- bool isWord : 1;
- // True if pattern will not consume more characters on backtracking
- bool isThisWillNotProgress : 1;
- // True if pattern will not consume fewer characters on backtracking
- bool isThisWillNotRegress : 1;
- // True if previous patterns will not consume more characters on backtracking
- bool isPrevWillNotProgress : 1;
- // True if previous patterns will not consume fewer characters on backtracking
- bool isPrevWillNotRegress : 1;
- // True if $ always follows pattern (and we are not in multi-line mode)
- bool isFollowEOL : 1;
- // True if pattern is deterministic (ie will never push a choicepoint during execution)
- // Determined in pass 4.
- bool isDeterministic : 1;
- // True if pattern is not in a loop context
- bool isNotInLoop : 1;
- // True if pattern will be matched against at least one segment of the input (ie will be executed at least once)
- bool isAtLeastOnce : 1;
- // True if pattern does not appear in an assertion
- bool isNotSpeculative : 1;
- // True if known to not be in a negative assertion context
- // (We do not play any games with double-negation)
- bool isNotNegated : 1;
- // True if this contains a hard fail BOI
- bool hasInitialHardFailBOI : 1;
- uint dummy : 17;
- // NOTE: The bodies of the following sets are allocated in the compile-time allocator and must be cloned
- // into the run-time allocator if they end up being used by an instruction.
- // NOTE: Sets may be aliased between nodes, and may be one of the standard sets.
- // Upper bound of FIRST characters of this pattern.
- // - Pattern will *never* match first characters not in this set
- // - If isFirstExact, pattern will *always* match first characters in this set (but may fail on later characters)
- // - If !isFirstExact, pattern *may* match first characters in this set, or may fail.
- CharSet<Char> *firstSet;
- // Upper bound of FOLLOW characters of this pattern.
- CharSet<Char> *followSet;
- // Range of number of characters already consumed before this pattern
- CountDomain prevConsumes;
- // Range of number of characters consumed by this pattern
- CountDomain thisConsumes;
- // Range of number of character consumed after this pattern
- CountDomain followConsumes;
- inline Node(NodeTag tag)
- : tag(tag)
- , features(0)
- , firstSet(0)
- , isFirstExact(false)
- , followSet(0)
- , isThisIrrefutable(false)
- , isFollowIrrefutable(false)
- , isWord(false)
- , isThisWillNotProgress(false)
- , isThisWillNotRegress(false)
- , isPrevWillNotProgress(false)
- , isPrevWillNotRegress(false)
- , isFollowEOL(false)
- , isDeterministic(false)
- , isNotInLoop(false)
- , isAtLeastOnce(false)
- , isNotSpeculative(false)
- , isNotNegated(false)
- , hasInitialHardFailBOI(false)
- {
- }
- //
- // Parse-time helpers
- //
- virtual CharCount LiteralLength() const = 0;
- virtual void AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const;
- virtual bool IsCharOrPositiveSet() const = 0;
- // Transfer pass 0:
- // - synthesize the total number of characters required to store all literals, including case-invariant
- // expansions where required
- // - adjust match char nodes to account for case invariance if necessary
- virtual CharCount TransferPass0(Compiler& compiler, const Char* litbuf) = 0;
- // Transfer pass 1:
- // - transfer literals from given litbuf into newLitbuf, advancing nextLit as we go
- // - adjust set nodes to account for case invariance if necessary
- virtual void TransferPass1(Compiler& compiler, const Char* litbuf) = 0;
- //
- // Compile-time helpers
- //
- // True if firstSet of this node can be used as the followSet of a previous node, even though this node may
- // accept empty. True only for simple assertions.
- virtual bool IsRefiningAssertion(Compiler& compiler) = 0;
- // Annotation pass 0:
- // - bottom-up: isWord
- // - refine WordBoundary nodes where possible
- virtual void AnnotatePass0(Compiler& compiler) = 0;
- // Annotation pass 1:
- // - top-down: isNotInLoop, isAtLeastOnce, isNotSpeculative, isNotNegated
- // - bottom-up: features, thisConsumes, firstSet, isFirstExact, isThisIrrefutable, isThisWillNotProgress, isThisWillNotRegress
- virtual void AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated) = 0;
- // Annotation pass 2
- // - left-to-right: prevConsumes, isPrevWillNotProgress, isPrevWillNotRegress.
- virtual void AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress) = 0;
- // Annotation pass 3
- // - right-to-left: followConsumes, followSet, isFollowIrrefutable, isFollowEOL
- virtual void AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL) = 0;
- // Annotation pass 4
- // - possibly simplify the node in-place
- // - decide on the compilation scheme for each node, possibly recording it within node-specific fields
- // - bottom-up: isDeterministic
- virtual void AnnotatePass4(Compiler& compiler) = 0;
- // Return true if pattern can be complied assuming some fixed-length prefix of a matching input string has already been consumed
- virtual bool SupportsPrefixSkipping(Compiler& compiler) const = 0;
- // Return the Match(Char|Literal|Set) at the start of pattern, or 0 if no such unique node
- virtual Node* HeadSyncronizingNode(Compiler& compiler) = 0;
- // Count how many literals are in pattern and return their minimum length. Returns 0
- // if pattern not in a form which can be used by a SyncToLiterals instruction.
- virtual CharCount MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const = 0;
- // Collect the literals counted by above and build scanners for them.
- virtual void CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const = 0;
- // Find a MatchLiteral or Alt of MatchLiterals which must appear at least once in input string for pattern
- // to match, and which has the shortest prevConsumes.
- virtual void BestSyncronizingNode(Compiler& compiler, Node*& bestNode) = 0;
- // Accumulate the range of groups definitions in pattern.
- // NOTE: minGroup must be > largest group, and maxGroup must be < 0 on topmost call
- virtual void AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup) = 0;
- // Emit code to consume this pattern. The first skipped characters of pattern have been consumed by context.
- virtual void Emit(Compiler& compiler, CharCount& skipped) = 0;
- // Emit code to scan forward for the first occurence of pattern, or hard fail if no such occurence.
- // - if isHeadSyncronizingNode, also consume the occurence and leave input pointer at first char after it
- // - otherwise, leave input pointer at the latest point of input which could match the overall pattern
- // (ie rewind from start of occurence accerding to the prevConsumes range)
- // - may actually do nothing if nothing worthwhile to scan to
- // Return number of characters consumed.
- virtual CharCount EmitScan(Compiler& compiler, bool isHeadSyncronizingNode) = 0;
- CharCount EmitScanFirstSet(Compiler& compiler);
- inline bool IsObviouslyDeterministic() { return (features & (HasAlt | HasLoop)) == 0; }
- inline bool ContainsAssertion() { return (features & (HasBOL | HasEOL | HasWordBoundary | HasAssertion)) != 0; }
- inline bool ContainsDefineGroup() { return (features & HasDefineGroup) != 0; }
- inline bool ContainsMatchGroup() { return (features & HasMatchGroup) != 0; }
- inline bool IsSimple() { return !ContainsAssertion() && !ContainsDefineGroup(); }
- inline bool IsSimpleOneChar() { return IsSimple() && !isThisIrrefutable && isFirstExact && thisConsumes.IsExact(1); }
- inline bool IsEmptyOnly() { return IsSimple() && isThisIrrefutable && thisConsumes.IsExact(0); }
- static bool IsBetterSyncronizingNode(Compiler& compiler, Node* curr, Node* proposed);
- //
- // Recognizers
- //
- // Is regex c
- bool IsSingleChar(Compiler& compiler, Char& outChar) const;
- // Is regex \b\w+\b?
- bool IsBoundedWord(Compiler& compiler) const;
- // Is regex ^\s*|\s*$
- bool IsLeadingTrailingSpaces(Compiler& compiler, CharCount& leftMinMatch, CharCount& rightMinMatch) const;
- // Is regex ^literal
- bool IsBOILiteral2(Compiler& compiler) const;
- // Can this regex be recognized by an Octoquad/Megamatch matcher? Ie is in grammar:
- // octoquad ::= atom{8} '|' atom{8}
- // atom ::= A | '['...charset drawn from A's...']'
- // and A is a set of exactly four ASCII characters
- virtual bool IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi) = 0;
- // Can this regex be recognized by a CharTrie structure? Ie is in grammar:
- // triearm ::= atom*
- // atom ::= c | '[' ... ']'
- // and factoring out sets does not exceed arm limit
- virtual bool IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const = 0;
- // Assuming above returned true, expand 'trie' node to include all literals recognized in this regex, and
- // continue expanding from each leaf using given 'cont' regex. Return false if any trie node has too many
- // children.
- // - If isAcceptFirst is true, ignore any literals which are proper extensions of a literal already in
- // the trie, but return false if any later literal is a prefix of an earlier literal.
- // (If the follow of the alt we are turning into a trie is irrefutable, we can simply stop at the
- // first shortest match).
- // - Otherwise, return false if any literal is a proper prefix of any other literal, irrespective of order.
- virtual bool BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const = 0;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- virtual void Print(DebugWriter* w, const Char* litbuf) const = 0;
- void PrintAnnotations(DebugWriter* w) const;
- #endif
- };
- #if ENABLE_REGEX_CONFIG_OPTIONS
- #define NODE_PRINT void Print(DebugWriter* w, const Char* litbuf) const override;
- #else
- #define NODE_PRINT
- #endif
- #define NODE_DECL CharCount LiteralLength() const override; \
- bool IsCharOrPositiveSet() const override; \
- CharCount TransferPass0(Compiler& compiler, const Char* litbuf) override; \
- void TransferPass1(Compiler& compiler, const Char* litbuf) override; \
- bool IsRefiningAssertion(Compiler& compiler) override; \
- void AnnotatePass0(Compiler& compiler) override; \
- void AnnotatePass1(Compiler& compiler, bool parentNotInLoop, bool parentAtLeastOnce, bool parentNotSpeculative, bool parentNotNegated) override; \
- void AnnotatePass2(Compiler& compiler, CountDomain accumConsumes, bool accumPrevWillNotProgress, bool accumPrevWillNotRegress) override; \
- void AnnotatePass3(Compiler& compiler, CountDomain accumConsumes, CharSet<Char>* accumFollow, bool accumFollowIrrefutable, bool accumFollowEOL) override; \
- void AnnotatePass4(Compiler& compiler) override; \
- bool SupportsPrefixSkipping(Compiler& compiler) const override; \
- Node* HeadSyncronizingNode(Compiler& compiler) override; \
- CharCount MinSyncronizingLiteralLength(Compiler& compiler, int& numLiterals) const override; \
- void CollectSyncronizingLiterals(Compiler& compiler, ScannersMixin& scanners) const override; \
- void BestSyncronizingNode(Compiler& compiler, Node*& bestNode) override; \
- void Emit(Compiler& compiler, CharCount& skipped) override; \
- CharCount EmitScan(Compiler& compiler, bool isHeadSyncronizingNode) override; \
- void AccumDefineGroups(Js::ScriptContext* scriptContext, int& minGroup, int& maxGroup) override; \
- bool IsOctoquad(Compiler& compiler, OctoquadIdentifier* oi) override; \
- bool IsCharTrieArm(Compiler& compiler, uint& accNumAlts) const override; \
- bool BuildCharTrie(Compiler& compiler, CharTrie* trie, Node* cont, bool isAcceptFirst) const override; \
- NODE_PRINT
- struct SimpleNode : Node
- {
- inline SimpleNode(NodeTag tag)
- : Node(tag)
- {
- }
- NODE_DECL
- };
- struct WordBoundaryNode : Node
- {
- bool isNegation;
- bool mustIncludeEntering; // isNegation => false
- bool mustIncludeLeaving; // isNegation => false
- inline WordBoundaryNode(bool isNegation)
- : Node(WordBoundary)
- , isNegation(isNegation)
- , mustIncludeEntering(false)
- , mustIncludeLeaving(false)
- {
- }
- NODE_DECL
- };
- struct MatchLiteralNode : Node
- {
- CharCount offset; // into literal buffer (initially parser's, then program's)
- CharCount length; // always > 1
- bool isEquivClass; // True if each consecutive triplet of characters of literal represents equivalence
- // class of characters to match against. Actual literal length will be 3 times the length
- // given above
- inline MatchLiteralNode(CharCount offset, CharCount length)
- : Node(MatchLiteral)
- , offset(offset)
- , length(length)
- , isEquivClass(false)
- {
- }
- NODE_DECL
- void AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const override;
- };
- struct MatchCharNode : Node
- {
- Char cs[CaseInsensitive::EquivClassSize];
- bool isEquivClass; // true if above characters represent equivalence class of characters, otherwise only
- // first character is significant
- inline MatchCharNode(Char c)
- : Node(MatchChar)
- , isEquivClass(false)
- {
- cs[0] = c;
- for (int i = 1; i < CaseInsensitive::EquivClassSize; i++)
- cs[i] = c;
- }
- NODE_DECL
- void AppendLiteral(CharCount& litbufNext, CharCount litbufLen, __inout_ecount(litbufLen) Char* litbuf) const override;
- CompileAssert(CaseInsensitive::EquivClassSize == 4);
- static void Emit(Compiler& compiler, __in_ecount(4) Char * cs, bool isEquivClass);
- private:
- CompileAssert(CaseInsensitive::EquivClassSize == 4);
- static CharCount FindUniqueEquivs(
- const Char equivs[CaseInsensitive::EquivClassSize],
- __out_ecount(4) Char uniqueEquivs[CaseInsensitive::EquivClassSize]);
- };
- struct MatchSetNode : Node
- {
- bool isNegation;
- bool needsEquivClass;
- CharSet<Char> set; // contents always owned by compile-time allocator
- // Set must be filled in
- inline MatchSetNode(bool isNegation, bool needsEquivClass = true)
- : Node(MatchSet)
- , isNegation(isNegation)
- , needsEquivClass(true)
- {
- }
- NODE_DECL
- };
- struct ConcatNode : Node
- {
- Node* head; // never a concat node
- ConcatNode* tail; // null for end, overall always length > 1, never consecutive literals/chars
- inline ConcatNode(Node* head, ConcatNode* tail)
- : Node(Concat)
- , head(head)
- , tail(tail)
- {
- }
- NODE_DECL
- };
- struct AltNode sealed : Node
- {
- Node* head; // never an alt node
- AltNode* tail; // null for end, overall always length > 1, never consecutive chars/sets
- enum CompilationScheme
- {
- Try, // Push choicepoint, try item, backtrack to next item on failure
- None, // No items (deterministic)
- Trie, // Match using trie of literals (deterministic)
- Switch, // Switch using 1 char lookahead (deterministic)
- Chain, // Chain of JumpIfNot(Char|Set) using 1 char lookahead (deterministic)
- Set // (Opt?)Match(Char|Set) (deterministic)
- };
- // Following determined in pass 4
- RuntimeCharTrie* runtimeTrie; // significant only if scheme == Trie
- CompilationScheme scheme;
- bool isOptional; // significant only if scheme == None|Switch|Chain|Set
- int switchSize; // significant only if scheme == Switch
- inline AltNode(Node* head, AltNode* tail)
- : Node(Alt)
- , head(head)
- , tail(tail)
- , scheme(Try)
- , runtimeTrie(0)
- , isOptional(false)
- , switchSize(0)
- {
- }
- NODE_DECL
- };
- struct DefineGroupNode : Node
- {
- Node* body;
- int groupId;
- enum CompilationScheme
- {
- Chomp, // Chomp matching characters and capture all into a group at the end
- Fixed, // Capture fixed-length group at end
- BeginEnd // Wrap by begin/end instructions
- };
- // Following determined in pass 4
- CompilationScheme scheme;
- bool noNeedToSave;
- inline DefineGroupNode(int groupId, Node* body)
- : Node(DefineGroup)
- , groupId(groupId)
- , body(body)
- , scheme(BeginEnd)
- , noNeedToSave(false)
- {
- }
- NODE_DECL
- };
- struct MatchGroupNode : Node
- {
- int groupId;
- inline MatchGroupNode(int groupId)
- : Node(MatchGroup)
- , groupId(groupId)
- {
- }
- NODE_DECL
- };
- struct LoopNode : Node
- {
- Node* body;
- CountDomain repeats;
- // If set and not equal to MaxChar, followFirst is the deterministic first character of the follow set of this node.
- // Could be expanded to encompass the entire firstSet of the next node.
- Char followFirst;
- bool isGreedy;
- enum CompilationScheme
- {
- BeginEnd, // Push choicepoints for each unravelling (greedy) or deferred unravelling (non-greedy) of body
- None, // Loop matches empty only (deterministic)
- Chomp, // Chomp matching characters (deterministic)
- ChompGroupLastChar, // Chomp matching characters and bind the last char to group (deterministic)
- Guarded, // Use 1 char lookahead to control loop repeats (deterministic)
- Fixed, // Loop body is non-zero fixed width, deterministic, group-free
- FixedSet, // Loop body is MatchSet
- FixedGroupLastIteration, // Loop body is non-zero fixed width, deterministic, loop body has one outer group
- GreedyNoBacktrack, // Can keep track of backtracking info in constant space (deterministic)
- Set, // Treat r? as r|<empty>, emit as for AltNode::Set
- Chain, // Treat r? as r|<empty>, emit as for AltNode::Chain
- Try // Treat r? as r|<empty>, emit as for AltNode::Try
- };
- // Following determined in pass 4
- bool noNeedToSave; // defined for ChompGroupLastChar/FixedGroupLastIteration only
- CompilationScheme scheme;
- inline LoopNode(CharCount lower, CharCountOrFlag upper, bool isGreedy, Node* body)
- : Node(Loop)
- , repeats(lower, upper)
- , isGreedy(isGreedy)
- , followFirst(MaxChar)
- , body(body)
- , scheme(BeginEnd)
- {
- }
- NODE_DECL
- };
- struct AssertionNode : Node
- {
- Node* body;
- bool isNegation;
- enum CompilationScheme
- {
- BeginEnd, // Protect assertion with begin/end instructions
- Succ, // Assertion will always succeed, without binding groups
- Fail // Assertion will always fail
- };
- // Following determined in pass 4
- CompilationScheme scheme;
- inline AssertionNode(bool isNegation, Node* body)
- : Node(Assertion)
- , isNegation(isNegation)
- , body(body)
- , scheme(BeginEnd)
- {
- }
- NODE_DECL
- };
- // ----------------------------------------------------------------------
- // Compiler
- // ----------------------------------------------------------------------
- class Compiler : private Chars<char16>
- {
- friend Node;
- friend SimpleNode;
- friend WordBoundaryNode;
- friend MatchLiteralNode;
- friend MatchCharNode;
- friend ConcatNode;
- friend AltNode;
- friend DefineGroupNode;
- friend MatchGroupNode;
- friend LoopNode;
- friend MatchSetNode;
- friend AssertionNode;
- private:
- static const CharCount initInstBufSize = 128;
- Js::ScriptContext* scriptContext;
- // Arena for nodes and items needed only during compilation
- ArenaAllocator* ctAllocator;
- // Arena for literals, sets and items needed during runtime
- ArenaAllocator* rtAllocator;
- StandardChars<Char>* standardChars;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- DebugWriter* w;
- RegexStats* stats;
- #endif
- Program* program;
- uint8* instBuf; // in compile-time allocator, owned by compiler
- CharCount instLen; // size of instBuf in bytes
- CharCount instNext; // offset to emit next instruction into
- int nextLoopId;
- private:
- uint8* Emit(size_t size);
- template <typename T>
- T* Emit();
- // The instruction buffer may move, so we need to remember label fixup's relative to the instruction base
- // rather than as machine addresses.
- // NOTE: pLabel is declared unaligned because Inst structs are unaligned in the inst buffer (thanks to #pragma pack(1)).
- inline Label GetFixup(unaligned Label* pLabel)
- {
- Assert((uint8*)pLabel >= instBuf && (uint8*)pLabel < instBuf + instNext);
- return (Label)((uint8*)pLabel - instBuf);
- }
- inline void DoFixup(Label fixup, Label label)
- {
- Assert(fixup < instNext);
- Assert(label <= instNext);
- *(Label*)(instBuf + fixup) = label;
- }
- inline Label CurrentLabel()
- {
- return instNext;
- }
- template <typename T>
- inline T* LabelToInstPointer(Inst::InstTag tag, Label label)
- {
- Assert(label + sizeof(T) <= instNext);
- Assert(((Inst*)(instBuf + label))->tag == tag);
- return (T*)(instBuf + label);
- }
- inline int NextLoopId()
- {
- return nextLoopId++;
- }
- inline Js::ScriptContext *GetScriptContext() const
- {
- return scriptContext;
- }
- inline Program *GetProgram() const
- {
- return program;
- }
- void SetBOIInstructionsProgramTag()
- {
- Assert(this->program->tag == Program::ProgramTag::InstructionsTag
- || this->program->tag == Program::ProgramTag::BOIInstructionsTag);
- Assert(this->CurrentLabel() == 0);
- this->program->tag = Program::ProgramTag::BOIInstructionsTag;
- }
- void SetBOIInstructionsProgramForStickyFlagTag()
- {
- Assert(this->program->tag == Program::ProgramTag::InstructionsTag
- || this->program->tag == Program::ProgramTag::BOIInstructionsForStickyFlagTag);
- Assert(this->CurrentLabel() == 0);
- AssertMsg((this->program->flags & StickyRegexFlag) != 0, "Shouldn't set BOIInstructionsForStickyFlagTag, if sticky is false.");
- this->program->tag = Program::ProgramTag::BOIInstructionsForStickyFlagTag;
- }
- static void CaptureNoLiterals(Program* program);
- void CaptureLiterals(Node* root, const Char *litbuf);
- static void EmitAndCaptureSuccInst(Recycler* recycler, Program* program);
- void CaptureInsts();
- void FreeBody();
- Compiler
- ( Js::ScriptContext* scriptContext
- , ArenaAllocator* ctAllocator
- , ArenaAllocator* rtAllocator
- , StandardChars<Char>* standardChars
- , Program* program
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , DebugWriter* w
- , RegexStats* stats
- #endif
- );
- public:
- static void CompileEmptyRegex
- ( Program* program
- , RegexPattern* pattern
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , DebugWriter* w
- , RegexStats* stats
- #endif
- );
- static void Compile
- ( Js::ScriptContext* scriptContext
- , ArenaAllocator* ctAllocator
- , ArenaAllocator* rtAllocator
- , StandardChars<Char>* standardChars
- , Program* program
- , Node* root
- , const Char* litbuf
- , RegexPattern* pattern
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , DebugWriter* w
- , RegexStats* stats
- #endif
- );
- };
- }
|