7 jaren geleden · b15fa2c1de
--- a/lib/Parser/RegexParser.cpp
+++ b/lib/Parser/RegexParser.cpp
@@ -1529,7 +1529,13 @@ namespace UnifiedRegex
 
				         else if (ECLookahead() == 'c')
			
 
				         {
			
 
				             if (standardEncodedChars->IsLetter(ECLookahead(1))) // terminating 0 is not a letter
			
 
				+            {
			
 
				                 ECConsume(2);
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                DeferredFailIfUnicode(JSERR_RegExpInvalidEscape);
			
 
				+            }
			
 
				             return false;
			
 
				         }
			
 
				         else
			
@@ -2494,6 +2500,8 @@ namespace UnifiedRegex
 
				                 }
			
 
				                 else
			
 
				                 {
			
 
				+                    DeferredFailIfUnicode(JSERR_RegExpInvalidEscape); // Fail in unicode mode for non-letter escaped control characters according to 262 Annex-B RegExp grammar SPEC #prod-annexB-Term 
			
 
				+
			
 
				                     if (!IsEOF())
			
 
				                     {
			
 
				                         EncodedChar ecLookahead = ECLookahead();
			
@@ -2625,7 +2633,7 @@ namespace UnifiedRegex
 
				                 standardChars->SetNonWordChars(ctAllocator, deferredSetNode->set);
			
 
				                 return deferredSetNode;
			
 
				             case 'c':
			
 
				-                if (standardEncodedChars->IsLetter(ECLookahead())) // terminating 0 is not a letter
			
 
				+                if (standardEncodedChars->IsWord(ECLookahead())) // terminating 0 is not a word
			
 
				                 {
			
 
				                     c = UTC(Chars<EncodedChar>::CTU(ECLookahead()) % 32);
			
 
				                     ECConsume();
			
@@ -2633,25 +2641,11 @@ namespace UnifiedRegex
 
				                 }
			
 
				                 else
			
 
				                 {
			
 
				-                    // SPEC DEVIATION: For non-letters, still take lower 5 bits, e.g. [\c1] == [\x11].
			
 
				-                    //                 However, '-', ']', and EOF make the \c just a 'c'.
			
 
				-                    if (!IsEOF())
			
 
				-                    {
			
 
				-                        EncodedChar ec = ECLookahead();
			
 
				-                        switch (ec)
			
 
				-                        {
			
 
				-                        case '-':
			
 
				-                        case ']':
			
 
				-                            // fall-through for identity escape with 'c'
			
 
				-                            break;
			
 
				-                        default:
			
 
				-                            c = UTC(Chars<EncodedChar>::CTU(ec) % 32);
			
 
				-                            ECConsume();
			
 
				-                            // fall-through for identity escape
			
 
				-                            break;
			
 
				-                        }
			
 
				-                    }
			
 
				-                    // else: fall-through for identity escape with 'c'
			
 
				+                    // If the lookahead is a non-alphanumeric and not a dash('-'), then treat '\' and 'c' separately.
			
 
				+                    //#sec-regular-expression-patterns-semantics 
			
 
				+                    ECRevert(1); //Put cursor back at 'c' and treat it as a non-escaped character.
			
 
				+                    deferredCharNode->cs[0] = '\\';
			
 
				+                    return deferredCharNode;
			
 
				                 }
			
 
				                 break;
			
 
				             case 'x':
			
--- a/test/Regex/control_character_escapes.js
+++ b/test/Regex/control_character_escapes.js
@@ -0,0 +1,201 @@
 
				+//-------------------------------------------------------------------------------------------------------

			
 
				+// Copyright (C) Microsoft. All rights reserved.

			
 
				+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.

			
 
				+//-------------------------------------------------------------------------------------------------------

			
 
				+

			
 
				+WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");

			
 
				+

			
 
				+function matchRegExp(str, regexpLiteral, expectedResult)

			
 
				+{

			
 
				+    matchResultLiteral = str.match(regexpLiteral);

			
 
				+    errorMsgBase = "Expected result of match between string: '" + str + "' and regular expression: " + regexpLiteral + " to be " + 

			
 
				+                    expectedResult + " but was "

			
 
				+

			
 
				+    actualResultLiteral = matchResultLiteral == null ? null : matchResultLiteral[0];

			
 
				+    assert.areEqual(expectedResult, actualResultLiteral, errorMsgBase + actualResultLiteral); 

			
 
				+    

			
 
				+    regexpConstructor = new RegExp(regexpLiteral);

			
 
				+    matchResultConstructor = str.match(regexpConstructor);

			
 
				+

			
 
				+    actualResultConstructor = matchResultConstructor == null ? null : matchResultConstructor[0];

			
 
				+    assert.areEqual(expectedResult, actualResultConstructor, errorMsgBase + actualResultConstructor); 

			
 
				+}

			
 
				+

			
 
				+var tests = [

			
 
				+    {

			
 
				+        name : "Control characters followed by a word character ([A-Za-z0-9_])",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            re = /[\c6]+/; //'6' = ascii x36

			
 
				+            matchRegExp("6", re, null);

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("\\c6", re, null);

			
 
				+            matchRegExp("c", re, null);

			
 
				+            matchRegExp("\x16", re, "\x16");

			
 
				+            

			
 
				+            re = /\c6/; //'6' = ascii x36

			
 
				+            matchRegExp("\\c6", re, "\\c6");

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("6", re, null);

			
 
				+            matchRegExp("c", re, null);

			
 
				+            matchRegExp("\x16", re, null);

			
 
				+            

			
 
				+            re = /\c6[\c6]+/; //'6' = ascii x36

			
 
				+            matchRegExp("\\c6\x16", re, "\\c6\x16");

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("c", re, null);

			
 
				+            matchRegExp("\x16", re, null);

			
 
				+            

			
 
				+            re = /[\ca]+/; //'a' = ascii x61

			
 
				+            matchRegExp("a", re, null);

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("c", re, null);

			
 
				+            matchRegExp("00xyzabc123\x01qrst", re, "\x01");

			
 
				+	    

			
 
				+            re = /[\c_]+/; //'_' = ascii 0x5F

			
 
				+            matchRegExp("\x1F\x1F\x05", re, "\x1F\x1F");

			
 
				+            matchRegExp("\\\\\\", re, null);

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("ccc_", re, null);

			
 
				+            

			
 
				+            re = /[\cG]*/; //'G' = ascii x47

			
 
				+            matchRegExp("\x07\x06\x05", re, "\x07");

			
 
				+            matchRegExp("\\\\", re, "");

			
 
				+            matchRegExp("////", re, "");

			
 
				+            matchRegExp("cccG", re, "");

			
 
				+            

			
 
				+            re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66

			
 
				+            matchRegExp("\x00\x03\x07\x06\x07\x08", re, "\x07\x06");

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("/", re, null);

			
 
				+            matchRegExp("\\cG\\c6\\cf", re, null);

			
 
				+            

			
 
				+            re = /[\cG\c6\cf]+/; //'G' = ascii x47, '6' = ascii x36, 'f' = ascii x66

			
 
				+            matchRegExp("\x00\x03\x07\x06\x16\x07\x08", re, "\x07\x06\x16\x07");

			
 
				+            matchRegExp("\\\\", re, null);

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("cfG6", re, null);

			
 
				+            

			
 
				+            re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66

			
 
				+            matchRegExp("\x00\x03\x07\x06\x16\x07\x08", re, "\x07\x06");

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("/", re, null);

			
 
				+            matchRegExp("\\cG\\c6\\cf", re, null);

			
 
				+            

			
 
				+            re = /[\cz\cZ]+/; //'z' = ascii x7A, 'Z' = ascii x5A, have the same lowest 5 bits

			
 
				+            matchRegExp("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + 

			
 
				+                        "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", re, "\x1a");

			
 
				+            matchRegExp("\\\\", re, null);

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("ccczZ", re, null);

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Control characters followed by a non-word character ([^A-Za-z0-9_])",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            re = /[\c*]+/; //'*' = ascii 42

			
 
				+            matchRegExp("\x0a\x09\x08", re, null);

			
 
				+            matchRegExp("a*c*b*d*", re, "*c*");

			
 
				+            matchRegExp("\\\\", re, "\\\\");

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("ccc", re, "ccc");

			
 
				+            

			
 
				+            re = /[\c}]*/; //'}' = ascii 125

			
 
				+            matchRegExp("\x1d\x7d\x3d", re, "");

			
 
				+            matchRegExp("}c}}cd*c*b*d*", re, "}c}}c");

			
 
				+            matchRegExp("\\\\", re, "\\\\");

			
 
				+            matchRegExp("////", re, "");

			
 
				+            matchRegExp("ccc", re, "ccc");

			
 
				+            

			
 
				+            re = /[\c;]+/; //';' = ascii 59

			
 
				+            matchRegExp("\x1b\x1c", re, null);

			
 
				+            matchRegExp("d;c;d;*", re, ";c;");

			
 
				+            matchRegExp("\\\\", re, "\\\\");

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("ccc", re, "ccc");

			
 
				+            

			
 
				+            re = /\c%/; //'%' = ascii x25

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("\\", re, null);

			
 
				+            matchRegExp("\\c%", re, "\\c%");

			
 
				+            matchRegExp("\x05", re, null);

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Control Character tests with unicode flag present",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            re = /[\cAg]+/u; //'A' = ascii x41

			
 
				+            matchRegExp("abcdefghi", re, "g");

			
 
				+            matchRegExp("\\\\", re, null);

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("\x01\x01gg\x02\x04ggg", re, "\x01\x01gg");            

			
 
				+            

			
 
				+            re = /[\czA]+/u;  //'z' = ascii x7A

			
 
				+            matchRegExp("abcdefghi", re, null);

			
 
				+            matchRegExp("\\\\", re, null);

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("YZA\x1aABC", re, "A\x1aA");    

			
 
				+            

			
 
				+            assert.throws(() => eval("\"\".match(/[\\c]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/[\\c-d]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/[ab\\c_$]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/[ab\\c\\d]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/[ab\\c3]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+                        

			
 
				+            re = /\cAg/u;  //'A' = ascii x41

			
 
				+            matchRegExp("abcdefghi", re, null);

			
 
				+            matchRegExp("\\\\", re, null);

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("\x01\x01gg\x02\x04ggg", re, "\x01g");            

			
 
				+            

			
 
				+            re = /\czA/u;  //'z' = ascii x7A

			
 
				+            matchRegExp("abcdefghi", re, null);

			
 
				+            matchRegExp("\\\\", re, null);

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("YZA\x1aABC", re, "\x1aA");   

			
 
				+            

			
 
				+            assert.throws(() => eval("\"\".match(/\\c/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/\\c-d/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/ab\\c_$/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/ab\\c\\d/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+            assert.throws(() => eval("\"\".match(/ab\\c3/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+                        "Invalid regular expression: invalid escape in unicode pattern");

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Control character edge cases",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            re = /[\c-g]+/; //'-' = ascii x2D

			
 
				+            matchRegExp("abcdefghi", re, "cdefg");

			
 
				+            matchRegExp("\\\\", re, "\\\\");

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("\x0d", re, null);

			
 
				+            matchRegExp("aobd\\f\\d", re, "d\\f\\d");            

			
 
				+            

			
 
				+            re = /[\c-]+/; //'-' = ascii x2D

			
 
				+            matchRegExp("abcdefghi", re, "c");

			
 
				+            matchRegExp("\x0d", re, null);

			
 
				+            matchRegExp("\\\\", re, "\\\\");

			
 
				+            matchRegExp("////", re, null);

			
 
				+            matchRegExp("aobd\\f\\d", re, "\\");  

			
 
				+            

			
 
				+            assert.throws(() => eval("\"\".match(/[\\c-a]/)"), SyntaxError, "Expected an error due to 'c-a' being an invalid range.", "Invalid range in character set");

			
 
				+        }

			
 
				+    }

			
 
				+];

			
 
				+

			
 
				+testRunner.runTests(tests, {

			
 
				+    verbose : WScript.Arguments[0] != "summary"

			
 
				+});
			
--- a/test/Regex/rlexe.xml
+++ b/test/Regex/rlexe.xml
@@ -229,10 +229,16 @@
 
				       <compile-flags>-args summary -endargs</compile-flags>
			
 
				     </default>
			
 
				   </test>
			
 
				-    <test>
			
 
				+  <test>
			
 
				     <default>
			
 
				       <files>characterclass_with_range.js</files>
			
 
				       <compile-flags>-args summary -endargs</compile-flags>
			
 
				     </default>
			
 
				   </test>
			
 
				+  <test>
			
 
				+    <default>
			
 
				+      <files>control_character_escapes.js</files>
			
 
				+      <compile-flags>-args summary -endargs</compile-flags>
			
 
				+    </default>
			
 
				+  </test>
			
 
				 </regress-exe>