Explorar el Código

Remove HardFailMixin to eliminate the boolean field and size cost.

Doug Ilijev hace 8 años
padre
commit
ace1d65388

+ 22 - 2
lib/Parser/RegexCompileTime.cpp

@@ -525,7 +525,15 @@ namespace UnifiedRegex
                         //  - not in a negative assertion
                         //  - backtracking could never rewind the input pointer
                         //
-                        EMIT(compiler, BOITestInst, isAtLeastOnce && isNotNegated && isPrevWillNotRegress);
+                        bool canHardFail = isAtLeastOnce && isNotNegated && isPrevWillNotRegress;
+                        if (canHardFail)
+                        {
+                            EMIT(compiler, BOITestInst<true>);
+                        }
+                        else
+                        {
+                            EMIT(compiler, BOITestInst<false>);
+                        }
                     }
                 }
                 break;
@@ -533,13 +541,16 @@ namespace UnifiedRegex
         case EOL:
             {
                 if ((compiler.program->flags & MultilineRegexFlag) != 0)
+                {
                     //
                     // Compilation scheme:
                     //
                     //   EOLTest
                     //
                     EMIT(compiler, EOLTestInst);
+                }
                 else
+                {
                     //
                     // Compilation scheme:
                     //
@@ -550,7 +561,16 @@ namespace UnifiedRegex
                     //  - not in a negative assertion
                     //  - backtracking could never advance the input pointer
                     //
-                    EMIT(compiler, EOITestInst, isAtLeastOnce && isNotNegated && isPrevWillNotProgress);
+                    bool canHardFail = isAtLeastOnce && isNotNegated && isPrevWillNotRegress;
+                    if (canHardFail)
+                    {
+                        EMIT(compiler, EOITestInst<true>);
+                    }
+                    else
+                    {
+                        EMIT(compiler, EOITestInst<false>);
+                    }
+                }
                 break;
             }
         default:

+ 5 - 2
lib/Parser/RegexOpCodes.h

@@ -20,10 +20,13 @@ M(Switch10)
 M(Switch20)
 M(SwitchAndConsume10)
 M(SwitchAndConsume20)
-M(BOITest)
-M(EOITest)
+MTemplate(BOIHardFailTest, template<bool canHardFail>, BOITestInst, BOITestInst<true>)
+MTemplate(BOITest, template<bool canHardFail>, BOITestInst, BOITestInst<false>)
+MTemplate(EOIHardFailTest, template<bool canHardFail>, EOITestInst, EOITestInst<true>)
+MTemplate(EOITest, template<bool canHardFail>, EOITestInst, EOITestInst<false>)
 M(BOLTest)
 M(EOLTest)
+// TODO (doilij) update Tag numbers
 // 0x10
 M(WordBoundaryTest)
 M(MatchChar)

+ 41 - 18
lib/Parser/RegexRuntime.cpp

@@ -384,7 +384,7 @@ namespace UnifiedRegex
         size_t size = sizeof(*((T *)that));
         byte *endByte = startByte + size;
         byte *currentByte = startByte;
-        w->Print(_u("0x%p[+0x%03x](0x%03x) [%s]:"), startByte, offset, size, annotation);
+        w->Print(_u("0x%p[+0x%03x](0x%03x)(sizeof:0x%03x)(alignof:0x%03x) [%s]:"), startByte, offset, size, sizeof(T), alignof(T), annotation);
 
         for (; currentByte < endByte; ++currentByte)
         {
@@ -782,13 +782,6 @@ namespace UnifiedRegex
     }
 #endif
 
-#if ENABLE_REGEX_CONFIG_OPTIONS
-    void HardFailMixin::Print(DebugWriter* w, const char16* litbuf) const
-    {
-        w->Print(_u("hardFail: %s"), canHardFail ? _u("true") : _u("false"));
-    }
-#endif
-
 #if ENABLE_REGEX_CONFIG_OPTIONS
     void GroupMixin::Print(DebugWriter* w, const char16* litbuf) const
     {
@@ -1449,7 +1442,13 @@ namespace UnifiedRegex
     // BOITestInst
     // ----------------------------------------------------------------------
 
-    inline bool BOITestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
+    template <>
+    inline BOITestInst<true>::BOITestInst() : Inst(BOIHardFailTest) {}
+    template <>
+    inline BOITestInst<false>::BOITestInst() : Inst(BOITest) {}
+
+    template <bool canHardFail>
+    inline bool BOITestInst<canHardFail>::Exec(REGEX_INST_EXEC_PARAMETERS) const
     {
         if (inputOffset > 0)
         {
@@ -1468,12 +1467,21 @@ namespace UnifiedRegex
     }
 
 #if ENABLE_REGEX_CONFIG_OPTIONS
-    int BOITestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
+    template <bool canHardFail>
+    int BOITestInst<canHardFail>::Print(DebugWriter* w, Label label, const Char* litbuf) const
     {
-        PRINT_RE_BYTECODE_BEGIN("BOITest");
-        PRINT_MIXIN(HardFailMixin);
+        if (canHardFail)
+        {
+            PRINT_RE_BYTECODE_BEGIN("BOIHardFailTest");
+        }
+        else
+        {
+            PRINT_RE_BYTECODE_BEGIN("BOITest");
+        }
+
+        w->Print(_u("<hardFail>: %s"), canHardFail ? _u("true") : _u("false"));
+
         PRINT_RE_BYTECODE_MID();
-        PRINT_BYTES(HardFailMixin);
         PRINT_RE_BYTECODE_END();
     }
 #endif
@@ -1482,7 +1490,13 @@ namespace UnifiedRegex
     // EOITestInst
     // ----------------------------------------------------------------------
 
-    inline bool EOITestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const
+    template <>
+    inline EOITestInst<true>::EOITestInst() : Inst(EOIHardFailTest) {}
+    template <>
+    inline EOITestInst<false>::EOITestInst() : Inst(EOITest) {}
+
+    template <bool canHardFail>
+    inline bool EOITestInst<canHardFail>::Exec(REGEX_INST_EXEC_PARAMETERS) const
     {
         if (inputOffset < inputLength)
         {
@@ -1501,12 +1515,21 @@ namespace UnifiedRegex
     }
 
 #if ENABLE_REGEX_CONFIG_OPTIONS
-    int EOITestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const
+    template <bool canHardFail>
+    int EOITestInst<canHardFail>::Print(DebugWriter* w, Label label, const Char* litbuf) const
     {
-        PRINT_RE_BYTECODE_BEGIN("EOITest");
-        PRINT_MIXIN(HardFailMixin);
+        if (canHardFail)
+        {
+            PRINT_RE_BYTECODE_BEGIN("EOIHardFailTest");
+        }
+        else
+        {
+            PRINT_RE_BYTECODE_BEGIN("EOITest");
+        }
+
+        w->Print(_u("<hardFail>: %s"), canHardFail ? _u("true") : _u("false"));
+
         PRINT_RE_BYTECODE_MID();
-        PRINT_BYTES(HardFailMixin);
         PRINT_RE_BYTECODE_END();
     }
 #endif

+ 10 - 15
lib/Parser/RegexRuntime.h

@@ -453,17 +453,6 @@ namespace UnifiedRegex
 #endif
     };
 
-    struct HardFailMixin
-    {
-        bool canHardFail;
-
-        inline HardFailMixin(bool canHardFail) : canHardFail(canHardFail) {}
-
-#if ENABLE_REGEX_CONFIG_OPTIONS
-        void Print(DebugWriter* w, const char16* litbuf) const;
-#endif
-    };
-
     struct GroupMixin
     {
         const int groupId;
@@ -851,20 +840,25 @@ namespace UnifiedRegex
     // Built-in assertions
     //
 
-    struct BOITestInst : Inst, HardFailMixin
+    // BOI = Beginning of Input
+    template <bool canHardFail>
+    struct BOITestInst : Inst
     {
-        inline BOITestInst(bool canHardFail) : Inst(BOITest), HardFailMixin(canHardFail) {}
+        inline BOITestInst();
 
         INST_BODY
     };
 
-    struct EOITestInst : Inst, HardFailMixin
+    // EOI = End of Input
+    template <bool canHardFail>
+    struct EOITestInst : Inst
     {
-        inline EOITestInst(bool canHardFail) : Inst(EOITest), HardFailMixin(canHardFail) {}
+        inline EOITestInst();
 
         INST_BODY
     };
 
+    // BOL = Beginning of Line (/^.../)
     struct BOLTestInst : Inst
     {
         inline BOLTestInst() : Inst(BOLTest) {}
@@ -872,6 +866,7 @@ namespace UnifiedRegex
         INST_BODY
     };
 
+    // EOL = End of Line (/...$/)
     struct EOLTestInst : Inst
     {
         inline EOLTestInst() : Inst(EOLTest) {}

+ 12 - 12
test/Regex/BoiHardFail.baseline

@@ -242,7 +242,7 @@ Program {
         L0000: SyncToLiteralAndBackupInstT<ScannerMixin> aka SyncToLiteralAndBackup(literal: "token" (with full map Boyer-Moore scanner), backup: [0-1])
         L0001: TryMatchSet(set: [0-9A-Z_a-z], failLabel: Lffff)
         L0002: DefineGroupFixed(groupId: 1, length: 1, noNeedToSave: false)
-        L0003: BOITest(hardFail: false)
+        L0003: BOITest(<hardFail>: false)
         L0004: MatchLiteral(literal: "token")
         L0005: Succ()
     }
@@ -474,7 +474,7 @@ Program {
     numLoops:     0
     instructions: {
         L0000: SyncToLiteralAndConsumeInstT<ScannerMixin> aka SyncToLiteralAndConsume(literal: "token" (with full map Boyer-Moore scanner))
-        L0001: BOITest(hardFail: true)
+        L0001: BOIHardFailTest(<hardFail>: true)
         L0002: Succ()
     }
 }
@@ -582,7 +582,7 @@ Program {
     numLoops:     0
     instructions: {
         L0000: SyncToLiteralAndConsumeInstT<ScannerMixin> aka SyncToLiteralAndConsume(literal: "token" (with full map Boyer-Moore scanner))
-        L0001: BOITest(hardFail: true)
+        L0001: BOIHardFailTest(<hardFail>: true)
         L0002: MatchLiteral(literal: "token")
         L0003: Succ()
     }
@@ -765,10 +765,10 @@ Program {
     numLoops:     0
     instructions: {
         L0000: Try(failLabel: Lffff)
-        L0001: BOITest(hardFail: false)
+        L0001: BOITest(<hardFail>: false)
         L0002: MatchLiteral(literal: "token")
         L0003: Jump(targetLabel: Lffff)
-        L0004: BOITest(hardFail: false)
+        L0004: BOITest(<hardFail>: false)
         L0005: MatchLiteral(literal: "abc")
         L0006: Succ()
     }
@@ -905,7 +905,7 @@ Program {
         L0000: BeginAssertion(minBodyGroupId: 1, maxBodyGroupId: -1, isNegation: true, nextLabel: Lffff)
         L0001: MatchLiteral(literal: "token")
         L0002: EndAssertion()
-        L0003: BOITest(hardFail: true)
+        L0003: BOIHardFailTest(<hardFail>: true)
         L0004: MatchLiteral(literal: "abc")
         L0005: Succ()
     }
@@ -1018,7 +1018,7 @@ Program {
     numLoops:     0
     instructions: {
         L0000: BeginAssertion(minBodyGroupId: 1, maxBodyGroupId: -1, isNegation: false, nextLabel: Lffff)
-        L0001: BOITest(hardFail: true)
+        L0001: BOIHardFailTest(<hardFail>: true)
         L0002: MatchLiteral(literal: "abc")
         L0003: EndAssertion()
         L0004: Succ()
@@ -1272,7 +1272,7 @@ Program {
     instructions: {
         L0000: SyncToCharAndBackup(c: 'a', backup: [0-inf])
         L0001: BeginLoopFixedGroupLastIteration(loopId: 0, repeats: [1-inf], hasOuterLoops: false, hasInnerNondet: false, exitLabel: Lffff, , length: 1, groupId: 1, noNeedToSave: true)
-        L0002: BOITest(hardFail: false)
+        L0002: BOITest(<hardFail>: false)
         L0003: MatchChar(c: 'a')
         L0004: RepeatLoopFixedGroupLastIteration(beginLabel: Lffff)
         L0005: Succ()
@@ -1338,7 +1338,7 @@ Program {
     numLoops:     0
     instructions: {
         L0000: BeginAssertion(minBodyGroupId: 1, maxBodyGroupId: -1, isNegation: false, nextLabel: Lffff)
-        L0001: BOITest(hardFail: true)
+        L0001: BOIHardFailTest(<hardFail>: true)
         L0002: EndAssertion()
         L0003: Succ()
     }
@@ -1493,7 +1493,7 @@ Program {
     numLoops:     1
     instructions: {
         L0000: BeginLoop(loopId: 0, repeats: [1-inf], hasOuterLoops: false, hasInnerNondet: false, exitLabel: Lffff, , minBodyGroupId: 1, maxBodyGroupId: 1, greedy: true)
-        L0001: BOITest(hardFail: false)
+        L0001: BOITest(<hardFail>: false)
         L0002: DefineGroupFixed(groupId: 1, length: 0, noNeedToSave: false)
         L0003: RepeatLoop(beginLabel: Lffff)
         L0004: Succ()
@@ -1559,7 +1559,7 @@ Program {
     numLoops:     0
     instructions: {
         L0000: BeginAssertion(minBodyGroupId: 1, maxBodyGroupId: -1, isNegation: true, nextLabel: Lffff)
-        L0001: BOITest(hardFail: false)
+        L0001: BOITest(<hardFail>: false)
         L0002: EndAssertion()
         L0003: Succ()
     }
@@ -1673,7 +1673,7 @@ Program {
     instructions: {
         L0000: SyncToLiteralAndBackupInstT<ScannerMixin_WithLinearCharMap> aka SyncToLinearLiteralAndBackup(literal: "abc" (with linear map Boyer-Moore scanner), backup: [0-inf])
         L0001: BeginLoop(loopId: 0, repeats: [1-inf], hasOuterLoops: false, hasInnerNondet: false, exitLabel: Lffff, , minBodyGroupId: 1, maxBodyGroupId: -1, greedy: false)
-        L0002: BOITest(hardFail: true)
+        L0002: BOIHardFailTest(<hardFail>: true)
         L0003: MatchLiteral(literal: "abc")
         L0004: RepeatLoop(beginLabel: Lffff)
         L0005: Succ()