7 anos atrás · eb33f9a451
--- a/lib/Parser/DebugWriter.cpp
+++ b/lib/Parser/DebugWriter.cpp
@@ -72,6 +72,8 @@ namespace UnifiedRegex
 
				         CheckForNewline();
			
 
				         if (c > 0xff)
			
 
				             Output::Print(_u("\\u%lc%lc%lc%lc"), hex[c >> 12], hex[(c >> 8) & 0xf], hex[(c >> 4) & 0xf], hex[c & 0xf]);
			
 
				+        else if (c == '-')
			
 
				+            Output::Print(_u("\\x2d"));
			
 
				         else if (c < ' ' || c > '~')
			
 
				             Output::Print(_u("\\x%lc%lc"), hex[c >> 4], hex[c & 0xf]);
			
 
				         else
			
--- a/lib/Parser/RegexParser.cpp
+++ b/lib/Parser/RegexParser.cpp
@@ -1931,6 +1931,7 @@ namespace UnifiedRegex
 
				         codepoint_t pendingRangeStart = INVALID_CODEPOINT;
			
 
				         codepoint_t pendingRangeEnd = INVALID_CODEPOINT;
			
 
				         bool previousSurrogatePart = false;
			
 
				+
			
 
				         while(nextChar != ']')
			
 
				         {
			
 
				             current = next;
			
@@ -2034,7 +2035,7 @@ namespace UnifiedRegex
 
				 
			
 
				                     lastCodepoint = INVALID_CODEPOINT;
			
 
				                 }
			
 
				-                // If we the next character is the end of range ']', then we can't have a surrogate pair.
			
 
				+                // If the next character is the end of range ']', then we can't have a surrogate pair.
			
 
				                 // The current character is the range end, if we don't already have a candidate.
			
 
				                 else if (ECLookahead() == ']' && pendingRangeEnd == INVALID_CODEPOINT)
			
 
				                 {
			
@@ -2124,6 +2125,10 @@ namespace UnifiedRegex
 
				         codepoint_t pendingRangeStart = INVALID_CODEPOINT;
			
 
				         EncodedChar nextChar = ECLookahead();
			
 
				         bool previousWasASurrogate = false;
			
 
				+        bool currIsACharSet = false;
			
 
				+        bool prevWasACharSet = false;
			
 
				+        bool prevprevWasACharSet = false;
			
 
				+
			
 
				         while(nextChar != ']')
			
 
				         {
			
 
				             codepoint_t codePointToSet = INVALID_CODEPOINT;
			
@@ -2147,30 +2152,30 @@ namespace UnifiedRegex
 
				             else if (nextChar == '\\')
			
 
				             {
			
 
				                 Node* returnedNode = ClassEscapePass1(&deferredCharNode, &deferredSetNode, previousWasASurrogate);
			
 
				+                codePointToSet = pendingCodePoint;
			
 
				 
			
 
				                 if (returnedNode->tag == Node::MatchSet)
			
 
				                 {
			
 
				-                    codePointToSet = pendingCodePoint;
			
 
				-                    pendingCodePoint = INVALID_CODEPOINT;
			
 
				+                    pendingCodePoint = nextChar;
			
 
				                     if (pendingRangeStart != INVALID_CODEPOINT)
			
 
				                     {
			
 
				                         codePointSet.Set(ctAllocator, '-');
			
 
				                     }
			
 
				                     pendingRangeStart = INVALID_CODEPOINT;
			
 
				                     codePointSet.UnionInPlace(ctAllocator, deferredSetNode.set);
			
 
				+                    currIsACharSet = true;
			
 
				                 }
			
 
				                 else
			
 
				                 {
			
 
				                     // Just a character
			
 
				-                    codePointToSet = pendingCodePoint;
			
 
				                     pendingCodePoint = deferredCharNode.cs[0];
			
 
				                 }
			
 
				             }
			
 
				             else if (nextChar == '-')
			
 
				             {
			
 
				-                if (pendingRangeStart != INVALID_CODEPOINT || pendingCodePoint == INVALID_CODEPOINT || ECLookahead(1) == ']')
			
 
				+                if ((!prevWasACharSet && (pendingRangeStart != INVALID_CODEPOINT || pendingCodePoint == INVALID_CODEPOINT)) ||  ECLookahead(1) == ']')
			
 
				                 {
			
 
				-                    // - is just a char, or end of a range.
			
 
				+                    // - is just a char, or end of a range. If the previous char of the RegExp was a charset we want to treat it as the beginning of a range.
			
 
				                     codePointToSet = pendingCodePoint;
			
 
				                     pendingCodePoint = '-';
			
 
				                     ECConsume();
			
@@ -2192,14 +2197,22 @@ namespace UnifiedRegex
 
				             {
			
 
				                 if (pendingRangeStart != INVALID_CODEPOINT)
			
 
				                 {
			
 
				-                    if (pendingRangeStart > pendingCodePoint)
			
 
				+                    if (pendingRangeStart > pendingCodePoint && !prevprevWasACharSet)
			
 
				                     {
			
 
				                         //We have no unicodeFlag, but current range contains surrogates, thus we may end up having to throw a "Syntax" error here
			
 
				                         //This breaks the notion of Pass0 check for valid syntax, because we don't know if we have a unicode option
			
 
				                         Assert(!unicodeFlagPresent);
			
 
				                         Fail(JSERR_RegExpBadRange);
			
 
				                     }
			
 
				-                    codePointSet.SetRange(ctAllocator, pendingRangeStart, pendingCodePoint);
			
 
				+                    if (prevprevWasACharSet)
			
 
				+                    {
			
 
				+                        codePointSet.Set(ctAllocator, '-');
			
 
				+                        codePointSet.Set(ctAllocator, pendingCodePoint);
			
 
				+                    }
			
 
				+                    else
			
 
				+                    {
			
 
				+                        codePointSet.SetRange(ctAllocator, pendingRangeStart, pendingCodePoint);
			
 
				+                    }
			
 
				                     pendingRangeStart = pendingCodePoint = INVALID_CODEPOINT;
			
 
				                 }
			
 
				                 else
			
@@ -2209,6 +2222,9 @@ namespace UnifiedRegex
 
				             }
			
 
				 
			
 
				             nextChar = ECLookahead();
			
 
				+            prevprevWasACharSet = prevWasACharSet;
			
 
				+            prevWasACharSet = currIsACharSet;
			
 
				+            currIsACharSet = false;
			
 
				         }
			
 
				 
			
 
				         if (pendingCodePoint != INVALID_CODEPOINT)
			
--- a/test/Regex/characterclass_with_range.js
+++ b/test/Regex/characterclass_with_range.js
@@ -0,0 +1,163 @@
 
				+//-------------------------------------------------------------------------------------------------------

			
 
				+// Copyright (C) Microsoft. All rights reserved.

			
 
				+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.

			
 
				+//-------------------------------------------------------------------------------------------------------

			
 
				+

			
 
				+WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");

			
 
				+

			
 
				+let re = /^[\s-a-z]$/;

			
 
				+let reIgnoreCase = /^[\s-a-z]$/i;

			
 
				+let reUnicode = /^[\s-z]$/u;

			
 
				+let reNoCharClass = /^[a-c-z]$/;

			
 
				+

			
 
				+var tests = [

			
 
				+    /*No Flag RegExp Tests begin*/

			
 
				+    {

			
 
				+        name : "Ensure 'a-z' not counted as range",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isFalse(re.test("b"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure 'a' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(re.test("a"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure ' ' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(re.test(" "));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure 'z' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(re.test("z"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure '\t' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(re.test("\t"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure 'a-z' not counted as range",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isFalse(re.test("q"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure '\' not counted in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isFalse(re.test("\\"));

			
 
				+        }

			
 
				+    },

			
 
				+    /*No Flag RegExp Tests End*/

			
 
				+    /*IgnoreCase Flag RegExp Tests Begin*/

			
 
				+    {

			
 
				+        name : "Ensure 'O' not included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isFalse(reIgnoreCase.test("O"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure 'A' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reIgnoreCase.test("A"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure ' ' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reIgnoreCase.test(" "));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure 'z' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reIgnoreCase.test("z"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "Ensure '\t' included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reIgnoreCase.test("\t"));

			
 
				+        }

			
 
				+    },

			
 
				+    /*IgnoreCase Flag RegExp Tests End*/

			
 
				+    /*Unicode Flag RegExp Tests Begin*/

			
 
				+    {

			
 
				+        name : "'-' included in set since \s-z treated as union of three types, not range",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reUnicode.test("-"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "' ' in set from \s character set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reUnicode.test(" "));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "b not included in '\s-z'",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isFalse(reUnicode.test("b"));

			
 
				+        }

			
 
				+    },

			
 
				+    /*Unicode Flag RegExp Tests End*/

			
 
				+    /*Non-character class tests Begin*/

			
 
				+    {

			
 
				+        name : "First range is used",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reNoCharClass.test("b"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "'-' included in set from 2nd dash",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reNoCharClass.test("-"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "z included in set",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isTrue(reNoCharClass.test("z"));

			
 
				+        }

			
 
				+    },

			
 
				+    {

			
 
				+        name : "'c-z' not viewed as range",

			
 
				+        body : function () 

			
 
				+        {

			
 
				+            assert.isFalse(reNoCharClass.test("y"));

			
 
				+        }

			
 
				+    }    

			
 
				+    /*Non-character class tests End*/

			
 
				+];

			
 
				+

			
 
				+if (typeof modifyTests !== "undefined") {

			
 
				+    tests = modifyTests(tests);

			
 
				+}

			
 
				+

			
 
				+testRunner.runTests(tests, {

			
 
				+    verbose : WScript.Arguments[0] != "summary"

			
 
				+});

			
--- a/test/Regex/rlexe.xml
+++ b/test/Regex/rlexe.xml
@@ -229,4 +229,10 @@
 
				       <compile-flags>-args summary -endargs</compile-flags>
			
 
				     </default>
			
 
				   </test>
			
 
				+    <test>
			
 
				+    <default>
			
 
				+      <files>characterclass_with_range.js</files>
			
 
				+      <compile-flags>-args summary -endargs</compile-flags>
			
 
				+    </default>
			
 
				+  </test>
			
 
				 </regress-exe>