преди 7 години · 4c26c3cfbf
--- a/lib/Parser/RegexParser.cpp
+++ b/lib/Parser/RegexParser.cpp
@@ -2167,7 +2167,7 @@ namespace UnifiedRegex
 
				                     {
			
 
				                         if (unicodeFlagPresent)
			
 
				                         {
			
 
				-                            //We a range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
			
 
				+                            //A range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
			
 
				                             //This breaks the notion of Pass0 check for valid syntax, because during that time, the unicode flag is unknown.
			
 
				                             Fail(JSERR_UnicodeRegExpRangeContainsCharClass); //From #sec-patterns-static-semantics-early-errors-annexb
			
 
				                         }
			
@@ -2212,7 +2212,7 @@ namespace UnifiedRegex
 
				             {
			
 
				                 if (prevprevWasACharSetAndPartOfRange)
			
 
				                 {
			
 
				-                    //We a range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
			
 
				+                    //A range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
			
 
				                     //This breaks the notion of Pass0 check for valid syntax, because during that time, the unicode flag is unknown.
			
 
				                     if (unicodeFlagPresent)
			
 
				                     {
			
@@ -2500,7 +2500,7 @@ namespace UnifiedRegex
 
				                 }
			
 
				                 else
			
 
				                 {
			
 
				-                    DeferredFailIfUnicode(JSERR_RegExpInvalidEscape); // Fail in unicode mode for non-letter escaped control characters according to 262 Annex-B RegExp grammar SPEC #prod-annexB-Term 
			
 
				+                    DeferredFailIfUnicode(JSERR_RegExpInvalidEscape); // Fail in unicode mode for non-letter escaped control characters according to 262 Annex-B RegExp grammar spec #prod-annexB-Term 
			
 
				 
			
 
				                     if (!IsEOF())
			
 
				                     {
			
@@ -2633,7 +2633,7 @@ namespace UnifiedRegex
 
				                 standardChars->SetNonWordChars(ctAllocator, deferredSetNode->set);
			
 
				                 return deferredSetNode;
			
 
				             case 'c':
			
 
				-                if (standardEncodedChars->IsWord(ECLookahead())) // terminating 0 is not a word
			
 
				+                if (standardEncodedChars->IsWord(ECLookahead())) // terminating 0 is not a word character
			
 
				                 {
			
 
				                     c = UTC(Chars<EncodedChar>::CTU(ECLookahead()) % 32);
			
 
				                     ECConsume();
			
@@ -2641,7 +2641,7 @@ namespace UnifiedRegex
 
				                 }
			
 
				                 else
			
 
				                 {
			
 
				-                    // If the lookahead is a non-alphanumeric and not a dash('-'), then treat '\' and 'c' separately.
			
 
				+                    // If the lookahead is a non-alphanumeric and not an underscore ('_'), then treat '\' and 'c' separately.
			
 
				                     //#sec-regular-expression-patterns-semantics 
			
 
				                     ECRevert(1); //Put cursor back at 'c' and treat it as a non-escaped character.
			
 
				                     deferredCharNode->cs[0] = '\\';
			
--- a/test/Regex/control_character_escapes.js
+++ b/test/Regex/control_character_escapes.js
@@ -26,63 +26,57 @@ var tests = [
 
				         name : "Control characters followed by a word character ([A-Za-z0-9_])",

			
 
				         body : function () 

			
 
				         {

			
 
				-            re = /[\c6]+/; //'6' = ascii x36

			
 
				+            re = /[\c6]+/; //'6' = ascii x36, parsed as [\x16]+

			
 
				             matchRegExp("6", re, null);

			
 
				             matchRegExp("\\", re, null);

			
 
				             matchRegExp("\\c6", re, null);

			
 
				             matchRegExp("c", re, null);

			
 
				             matchRegExp("\x16", re, "\x16");

			
 
				             

			
 
				-            re = /\c6/; //'6' = ascii x36

			
 
				+            re = /\c6/; //'6' = ascii x36, parsed as "\\c6"

			
 
				             matchRegExp("\\c6", re, "\\c6");

			
 
				             matchRegExp("\\", re, null);

			
 
				             matchRegExp("6", re, null);

			
 
				             matchRegExp("c", re, null);

			
 
				             matchRegExp("\x16", re, null);

			
 
				             

			
 
				-            re = /\c6[\c6]+/; //'6' = ascii x36

			
 
				+            re = /\c6[\c6]+/; //'6' = ascii x36, parsed as "\\c6"[\x16]+

			
 
				             matchRegExp("\\c6\x16", re, "\\c6\x16");

			
 
				             matchRegExp("\\", re, null);

			
 
				             matchRegExp("c", re, null);

			
 
				             matchRegExp("\x16", re, null);

			
 
				             

			
 
				-            re = /[\ca]+/; //'a' = ascii x61

			
 
				+            re = /[\ca]+/; //'a' = ascii x61, parsed as [\x01]+

			
 
				             matchRegExp("a", re, null);

			
 
				             matchRegExp("\\", re, null);

			
 
				             matchRegExp("c", re, null);

			
 
				             matchRegExp("00xyzabc123\x01qrst", re, "\x01");

			
 
				 	    

			
 
				-            re = /[\c_]+/; //'_' = ascii 0x5F

			
 
				+            re = /[\c_]+/; //'_' = ascii 0x5F, parsed as [\x1F]+

			
 
				             matchRegExp("\x1F\x1F\x05", re, "\x1F\x1F");

			
 
				             matchRegExp("\\\\\\", re, null);

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("ccc_", re, null);

			
 
				             

			
 
				-            re = /[\cG]*/; //'G' = ascii x47

			
 
				+            re = /[\cG]*/; //'G' = ascii x47, parsed as [\x07]*

			
 
				             matchRegExp("\x07\x06\x05", re, "\x07");

			
 
				             matchRegExp("\\\\", re, "");

			
 
				             matchRegExp("////", re, "");

			
 
				             matchRegExp("cccG", re, "");

			
 
				             

			
 
				-            re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66

			
 
				-            matchRegExp("\x00\x03\x07\x06\x07\x08", re, "\x07\x06");

			
 
				-            matchRegExp("\\", re, null);

			
 
				-            matchRegExp("/", re, null);

			
 
				-            matchRegExp("\\cG\\c6\\cf", re, null);

			
 
				-            

			
 
				-            re = /[\cG\c6\cf]+/; //'G' = ascii x47, '6' = ascii x36, 'f' = ascii x66

			
 
				+            re = /[\cG\c6\cf]+/; //'G' = ascii x47, '6' = ascii x36, 'f' = ascii x66, parsed as [\x07\x16\x06]+

			
 
				             matchRegExp("\x00\x03\x07\x06\x16\x07\x08", re, "\x07\x06\x16\x07");

			
 
				             matchRegExp("\\\\", re, null);

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("cfG6", re, null);

			
 
				             

			
 
				-            re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66

			
 
				+            re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66, parsed as "\x07\x06"

			
 
				             matchRegExp("\x00\x03\x07\x06\x16\x07\x08", re, "\x07\x06");

			
 
				             matchRegExp("\\", re, null);

			
 
				             matchRegExp("/", re, null);

			
 
				             matchRegExp("\\cG\\c6\\cf", re, null);

			
 
				             

			
 
				-            re = /[\cz\cZ]+/; //'z' = ascii x7A, 'Z' = ascii x5A, have the same lowest 5 bits

			
 
				+            re = /[\cz\cZ]+/; //'z' = ascii x7A, 'Z' = ascii x5A, have the same lowest 5 bits, parsed as [\x1A]+

			
 
				             matchRegExp("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + 

			
 
				                         "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", re, "\x1a");

			
 
				             matchRegExp("\\\\", re, null);

			
@@ -94,28 +88,28 @@ var tests = [
 
				         name : "Control characters followed by a non-word character ([^A-Za-z0-9_])",

			
 
				         body : function () 

			
 
				         {

			
 
				-            re = /[\c*]+/; //'*' = ascii 42

			
 
				+            re = /[\c*]+/; //'*' = ascii 42, parsed as [\\c*]+ 

			
 
				             matchRegExp("\x0a\x09\x08", re, null);

			
 
				             matchRegExp("a*c*b*d*", re, "*c*");

			
 
				             matchRegExp("\\\\", re, "\\\\");

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("ccc", re, "ccc");

			
 
				             

			
 
				-            re = /[\c}]*/; //'}' = ascii 125

			
 
				+            re = /[\c}]*/; //'}' = ascii 125, parsed as [\\c}]*

			
 
				             matchRegExp("\x1d\x7d\x3d", re, "");

			
 
				             matchRegExp("}c}}cd*c*b*d*", re, "}c}}c");

			
 
				             matchRegExp("\\\\", re, "\\\\");

			
 
				             matchRegExp("////", re, "");

			
 
				             matchRegExp("ccc", re, "ccc");

			
 
				             

			
 
				-            re = /[\c;]+/; //';' = ascii 59

			
 
				+            re = /[\c;]+/; //';' = ascii 59, parsed as [\\c;]+

			
 
				             matchRegExp("\x1b\x1c", re, null);

			
 
				             matchRegExp("d;c;d;*", re, ";c;");

			
 
				             matchRegExp("\\\\", re, "\\\\");

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("ccc", re, "ccc");

			
 
				             

			
 
				-            re = /\c%/; //'%' = ascii x25

			
 
				+            re = /\c%/; //'%' = ascii x25, parsed as \\c%

			
 
				             matchRegExp("\\", re, null);

			
 
				             matchRegExp("\\", re, null);

			
 
				             matchRegExp("\\c%", re, "\\c%");

			
@@ -126,50 +120,50 @@ var tests = [
 
				         name : "Control Character tests with unicode flag present",

			
 
				         body : function () 

			
 
				         {

			
 
				-            re = /[\cAg]+/u; //'A' = ascii x41

			
 
				+            re = /[\cAg]+/u; //'A' = ascii x41, parsed as [g\x01]+

			
 
				             matchRegExp("abcdefghi", re, "g");

			
 
				             matchRegExp("\\\\", re, null);

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("\x01\x01gg\x02\x04ggg", re, "\x01\x01gg");            

			
 
				             

			
 
				-            re = /[\czA]+/u;  //'z' = ascii x7A

			
 
				+            re = /[\czA]+/u;  //'z' = ascii x7A, parsed as [\x1AA]+

			
 
				             matchRegExp("abcdefghi", re, null);

			
 
				             matchRegExp("\\\\", re, null);

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("YZA\x1aABC", re, "A\x1aA");    

			
 
				             

			
 
				-            assert.throws(() => eval("\"\".match(/[\\c]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/[\\c]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by no character here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/[\\c-d]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/[\\c-d]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a dash, '-', here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/[ab\\c_$]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/[ab\\c_$]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by an underscore, '_', here.",

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/[ab\\c\\d]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/[ab\\c\\d]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a backslash, '\\', here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/[ab\\c3]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/[ab\\c3]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a number, '3', here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				                         

			
 
				-            re = /\cAg/u;  //'A' = ascii x41

			
 
				+            re = /\cAg/u;  //'A' = ascii x41, parsed as "\x01g"

			
 
				             matchRegExp("abcdefghi", re, null);

			
 
				             matchRegExp("\\\\", re, null);

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("\x01\x01gg\x02\x04ggg", re, "\x01g");            

			
 
				             

			
 
				-            re = /\czA/u;  //'z' = ascii x7A

			
 
				+            re = /\czA/u;  //'z' = ascii x7A, parsed as "\x1aA"

			
 
				             matchRegExp("abcdefghi", re, null);

			
 
				             matchRegExp("\\\\", re, null);

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("YZA\x1aABC", re, "\x1aA");   

			
 
				             

			
 
				-            assert.throws(() => eval("\"\".match(/\\c/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/\\c/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by no character here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/\\c-d/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/\\c-d/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a dash, '-', here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/ab\\c_$/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/ab\\c_$/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by an underscore, '_', here.",

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/ab\\c\\d/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/ab\\c\\d/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a backslash, '\\', here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				-            assert.throws(() => eval("\"\".match(/ab\\c3/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present", 

			
 
				+            assert.throws(() => eval("\"\".match(/ab\\c3/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a number, '3', here.", 

			
 
				                         "Invalid regular expression: invalid escape in unicode pattern");

			
 
				         }

			
 
				     },

			
@@ -177,16 +171,16 @@ var tests = [
 
				         name : "Control character edge cases",

			
 
				         body : function () 

			
 
				         {

			
 
				-            re = /[\c-g]+/; //'-' = ascii x2D

			
 
				+            re = /[\c-g]+/; //'-' = ascii x2D, parsed as [\\c-g]+ 

			
 
				             matchRegExp("abcdefghi", re, "cdefg");

			
 
				             matchRegExp("\\\\", re, "\\\\");

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("\x0d", re, null);

			
 
				             matchRegExp("aobd\\f\\d", re, "d\\f\\d");            

			
 
				             

			
 
				-            re = /[\c-]+/; //'-' = ascii x2D

			
 
				+            re = /[\c-]+/; //'-' = ascii x2D, parsed as [\\c-]+

			
 
				             matchRegExp("abcdefghi", re, "c");

			
 
				-            matchRegExp("\x0d", re, null);

			
 
				+            matchRegExp("\x0dc--c", re, "c--c");

			
 
				             matchRegExp("\\\\", re, "\\\\");

			
 
				             matchRegExp("////", re, null);

			
 
				             matchRegExp("aobd\\f\\d", re, "\\");  

			
@@ -198,4 +192,4 @@ var tests = [
 
				 

			
 
				 testRunner.runTests(tests, {

			
 
				     verbose : WScript.Arguments[0] != "summary"

			
 
				-});
			
 
				+});