Explorar o código

[MERGE #5669 @dilijev] Make some tweaks to RegExp debug switches.

Merge pull request #5669 from dilijev:re-debug-switches

This change only affects debug output displayed under `-RegexDebug`.

Prior to this change, `-RegexDebug` has 3 "chapters" of output, all enabled with no option to disable: AST, Annotated AST, and (Bytecode) Program.

This change adds flags `-RegexDebugAST[-]` and `-RegexDebugAnnotatedAST[-]`  to disable the AST output chapters and bring focus to the resulting bytecode program, which is the most relevant for seeing how effective the regex bytecode gen was for a given case. The AST output can be noisy and distracts from this goal.

This change leaves the AST output chapters on-by-default to match existing behavior.

The AST may be interesting information when working on the Regex engine even if not doing work on optimizations. In that case the on-by-default behavior for `-RegexDebug` should probably be: `-RegexDebugAST -RegexDebugAnnotatedAST-`
Doug Ilijev %!s(int64=7) %!d(string=hai) anos
pai
achega
6efd6f66df

+ 4 - 0
lib/Common/ConfigFlagsList.h

@@ -559,6 +559,8 @@ PHASE(All)
 #define DEFAULT_CONFIG_RegexTracing         (false)
 #define DEFAULT_CONFIG_RegexProfile         (false)
 #define DEFAULT_CONFIG_RegexDebug           (false)
+#define DEFAULT_CONFIG_RegexDebugAST        (true)
+#define DEFAULT_CONFIG_RegexDebugAnnotatedAST (true)
 #define DEFAULT_CONFIG_RegexBytecodeDebug   (false)
 #define DEFAULT_CONFIG_RegexOptimize        (true)
 #define DEFAULT_CONFIG_DynamicRegexMruListSize (16)
@@ -1547,6 +1549,8 @@ FLAGNR(Boolean, ValidateHeapEnum      , "Validate that heap enumeration is repor
 FLAGR (Boolean, RegexTracing          , "Trace all Regex invocations to the output.", DEFAULT_CONFIG_RegexTracing)
 FLAGR (Boolean, RegexProfile          , "Collect usage statistics on all Regex invocations.", DEFAULT_CONFIG_RegexProfile)
 FLAGR (Boolean, RegexDebug            , "Trace compilation of UnifiedRegex expressions.", DEFAULT_CONFIG_RegexDebug)
+FLAGR (Boolean, RegexDebugAST         , "Display Regex AST (requires -RegexDebug to view). [default on]", DEFAULT_CONFIG_RegexDebugAST)
+FLAGR (Boolean, RegexDebugAnnotatedAST, "Display Regex Annotated AST (requires -RegexDebug and -RegexDebugAST to view). [default on]", DEFAULT_CONFIG_RegexDebugAnnotatedAST)
 FLAGR (Boolean, RegexBytecodeDebug    , "Display layout of UnifiedRegex bytecode (requires -RegexDebug to view).", DEFAULT_CONFIG_RegexBytecodeDebug)
 FLAGR (Boolean, RegexOptimize         , "Optimize regular expressions in the unified Regex system (default: true)", DEFAULT_CONFIG_RegexOptimize)
 FLAGR (Number,  DynamicRegexMruListSize, "Size of the MRU list for dynamic regexes", DEFAULT_CONFIG_DynamicRegexMruListSize)

+ 2 - 2
lib/Parser/RegexCompileTime.cpp

@@ -4611,7 +4611,7 @@ namespace UnifiedRegex
     {
 
 #if ENABLE_REGEX_CONFIG_OPTIONS
-        if (w != 0)
+        if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST))
         {
             w->PrintEOL(_u("REGEX AST /%s/ {"), PointerValue(program->source));
             w->Indent();
@@ -4723,7 +4723,7 @@ namespace UnifiedRegex
                     root->AnnotatePass4(compiler);
 
 #if ENABLE_REGEX_CONFIG_OPTIONS
-                    if (w != 0)
+                    if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST) && REGEX_CONFIG_FLAG(RegexDebugAnnotatedAST))
                     {
                         w->PrintEOL(_u("REGEX ANNOTATED AST /%s/ {"), PointerValue(program->source));
                         w->Indent();

+ 3 - 0
lib/Parser/RegexPattern.cpp

@@ -183,6 +183,9 @@ namespace UnifiedRegex
                 case _u('\x2029'):
                     w->PrintEscapedChar(c);
                     break;
+                case _u('-'):
+                    w->Print(_u("-"));
+                    break;
                 case _u('\\'):
                     Assert(i + 1 < str.GetLength()); // cannot end in a '\'
                     w->Print(_u("\\%lc"), str.GetBuffer()[++i]);

+ 21 - 0
lib/Parser/RegexRuntime.cpp

@@ -778,6 +778,7 @@ namespace UnifiedRegex
 #if ENABLE_REGEX_CONFIG_OPTIONS
     void TrieMixin::Print(DebugWriter* w, const char16* litbuf) const
     {
+        w->PrintEOL(_u(""));
         trie.Print(w);
     }
 #endif
@@ -5752,6 +5753,26 @@ namespace UnifiedRegex
         w->PrintEOL(_u("Program {"));
         w->Indent();
         w->PrintEOL(_u("source:       %s"), PointerValue(source));
+
+        w->Print(_u("litbuf:       "));
+        const char16 *litbuf = this->rep.insts.litbuf;
+        size_t litbufLen = 0;
+        if (litbuf == nullptr)
+        {
+            w->PrintEOL(_u("<NONE>"));
+        }
+        else
+        {
+            litbufLen = this->rep.insts.litbufLen;
+            for (size_t i = 0; i < litbufLen; ++i)
+            {
+                const char16 c = (char16)litbuf[i];
+                w->PrintEscapedChar(c);
+            }
+            w->PrintEOL(_u(""));
+        }
+        w->PrintEOL(_u("litbufLen:    %u"), litbufLen);
+
         w->Print(_u("flags:        "));
         if ((flags & GlobalRegexFlag) != 0) w->Print(_u("global "));
         if ((flags & MultilineRegexFlag) != 0) w->Print(_u("multiline "));

+ 32 - 0
test/Regex/BoiHardFail.baseline

@@ -75,6 +75,8 @@ REGEX ANNOTATED AST /^token/ {
 REGEX PROGRAM /^token/
 Program {
     source:       ^token
+    litbuf:       token
+    litbufLen:    5
     flags:        
     numGroups:    1
     numLoops:     0
@@ -235,6 +237,8 @@ REGEX ANNOTATED AST /(\w)?^token/ {
 REGEX PROGRAM /(\w)?^token/
 Program {
     source:       (\w)?^token
+    litbuf:       token
+    litbufLen:    5
     flags:        
     numGroups:    2
     numLoops:     0
@@ -276,6 +280,8 @@ REGEX ANNOTATED AST /token/ {
 REGEX PROGRAM /token/
 Program {
     source:       token
+    litbuf:       token
+    litbufLen:    5
     flags:        
     numGroups:    1
     numLoops:     0
@@ -383,6 +389,8 @@ REGEX ANNOTATED AST /^^token/ {
 REGEX PROGRAM /^^token/
 Program {
     source:       ^^token
+    litbuf:       token
+    litbufLen:    5
     flags:        
     numGroups:    1
     numLoops:     0
@@ -469,6 +477,8 @@ REGEX ANNOTATED AST /token^/ {
 REGEX PROGRAM /token^/
 Program {
     source:       token^
+    litbuf:       token
+    litbufLen:    5
     flags:        
     numGroups:    1
     numLoops:     0
@@ -577,6 +587,8 @@ REGEX ANNOTATED AST /token^token/ {
 REGEX PROGRAM /token^token/
 Program {
     source:       token^token
+    litbuf:       tokentoken
+    litbufLen:    10
     flags:        
     numGroups:    1
     numLoops:     0
@@ -760,6 +772,8 @@ REGEX ANNOTATED AST /^token|^abc/ {
 REGEX PROGRAM /^token|^abc/
 Program {
     source:       ^token|^abc
+    litbuf:       tokenabc
+    litbufLen:    8
     flags:        
     numGroups:    1
     numLoops:     0
@@ -898,6 +912,8 @@ REGEX ANNOTATED AST /(?!token)^abc/ {
 REGEX PROGRAM /(?!token)^abc/
 Program {
     source:       (?!token)^abc
+    litbuf:       tokenabc
+    litbufLen:    8
     flags:        
     numGroups:    1
     numLoops:     0
@@ -1013,6 +1029,8 @@ REGEX ANNOTATED AST /(?=^abc)/ {
 REGEX PROGRAM /(?=^abc)/
 Program {
     source:       (?=^abc)
+    litbuf:       abc
+    litbufLen:    3
     flags:        
     numGroups:    1
     numLoops:     0
@@ -1127,6 +1145,8 @@ REGEX ANNOTATED AST /(^token)/ {
 REGEX PROGRAM /(^token)/
 Program {
     source:       (^token)
+    litbuf:       token
+    litbufLen:    5
     flags:        
     numGroups:    2
     numLoops:     0
@@ -1266,6 +1286,8 @@ REGEX ANNOTATED AST /(^a)+/ {
 REGEX PROGRAM /(^a)+/
 Program {
     source:       (^a)+
+    litbuf:       <NONE>
+    litbufLen:    0
     flags:        
     numGroups:    2
     numLoops:     1
@@ -1333,6 +1355,8 @@ REGEX ANNOTATED AST /(?=^)/ {
 REGEX PROGRAM /(?=^)/
 Program {
     source:       (?=^)
+    litbuf:       <NONE>
+    litbufLen:    0
     flags:        
     numGroups:    1
     numLoops:     0
@@ -1398,6 +1422,8 @@ REGEX ANNOTATED AST /(^)/ {
 REGEX PROGRAM /(^)/
 Program {
     source:       (^)
+    litbuf:       <NONE>
+    litbufLen:    0
     flags:        
     numGroups:    2
     numLoops:     0
@@ -1488,6 +1514,8 @@ REGEX ANNOTATED AST /(^)+/ {
 REGEX PROGRAM /(^)+/
 Program {
     source:       (^)+
+    litbuf:       <NONE>
+    litbufLen:    0
     flags:        
     numGroups:    2
     numLoops:     1
@@ -1554,6 +1582,8 @@ REGEX ANNOTATED AST /(?!^)/ {
 REGEX PROGRAM /(?!^)/
 Program {
     source:       (?!^)
+    litbuf:       <NONE>
+    litbufLen:    0
     flags:        
     numGroups:    1
     numLoops:     0
@@ -1667,6 +1697,8 @@ REGEX ANNOTATED AST /(?:^abc)+?/ {
 REGEX PROGRAM /(?:^abc)+?/
 Program {
     source:       (?:^abc)+?
+    litbuf:       abc
+    litbufLen:    3
     flags:        
     numGroups:    1
     numLoops:     1