9 年之前 · 08992648e5
--- a/lib/Common/Codex/Utf8Codex.cpp
+++ b/lib/Common/Codex/Utf8Codex.cpp
@@ -437,13 +437,15 @@ LSlowPath:
 
				         return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options);
			
 
				     }
			
 
				 
			
 
				-    bool CharsAreEqual(__in_ecount(cch) LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, size_t cch, DecodeOptions options)
			
 
				+    bool CharsAreEqual(LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, DecodeOptions options)
			
 
				     {
			
 
				         DecodeOptions localOptions = options;
			
 
				-        while (cch-- > 0)
			
 
				+        while (bch < end)
			
 
				         {
			
 
				             if (*pch++ != utf8::Decode(bch, end, localOptions))
			
 
				+            {
			
 
				                 return false;
			
 
				+            }
			
 
				         }
			
 
				         return true;
			
 
				     }
			
--- a/lib/Common/Codex/Utf8Codex.h
+++ b/lib/Common/Codex/Utf8Codex.h
@@ -307,7 +307,7 @@ namespace utf8
 
				     size_t EncodeTrueUtf8IntoAndNullTerminate(__out_ecount(cch * 3 + 1) utf8char_t *buffer, __in_ecount(cch) const char16 *source, charcount_t cch);
			
 
				 
			
 
				     // Returns true if the pch refers to a UTF-16LE encoding of the given UTF-8 encoding bch.
			
 
				-    bool CharsAreEqual(__in_ecount(cch) LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, size_t cch, DecodeOptions options = doDefault);
			
 
				+    bool CharsAreEqual(LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, DecodeOptions options = doDefault);
			
 
				 
			
 
				     // Convert the character index into a byte index.
			
 
				     size_t CharacterIndexToByteIndex(__in_ecount(cbLength) LPCUTF8 pch, size_t cbLength, const charcount_t cchIndex, size_t cbStartIndex, charcount_t cchStartIndex, DecodeOptions options = doDefault);
			
--- a/lib/Parser/Hash.cpp
+++ b/lib/Parser/Hash.cpp
@@ -222,7 +222,7 @@ IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, CharType const * end, u
 
				     // NOTE: We use case sensitive hash during compilation, but the runtime
			
 
				     // uses case insensitive hashing so it can do case insensitive lookups.
			
 
				 
			
 
				-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, end, cch);
			
 
				+    uint32 luHash = CaseSensitiveComputeHash(prgch, end);
			
 
				     return PidHashNameLenWithHash(prgch, end, cch, luHash);
			
 
				 }
			
 
				 template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, utf8char_t const * end, uint32 cch);
			
@@ -244,7 +244,7 @@ IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch,
 
				 {
			
 
				     Assert(cch >= 0);
			
 
				     AssertArrMemR(prgch, cch);
			
 
				-    Assert(luHash == CaseSensitiveComputeHashCch(prgch, end, cch));
			
 
				+    Assert(luHash == CaseSensitiveComputeHash(prgch, end));
			
 
				 
			
 
				     IdentPtr * ppid;
			
 
				     IdentPtr pid;
			
@@ -352,7 +352,7 @@ IdentPtr HashTbl::FindExistingPid(
 
				     for (bucketCount = 0; nullptr != (pid = *ppid); ppid = &pid->m_pidNext, bucketCount++)
			
 
				     {
			
 
				         if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
			
 
				-            HashTbl::CharsAreEqual(pid->m_sz, prgch, end, cch))
			
 
				+            HashTbl::CharsAreEqual(pid->m_sz, prgch, end))
			
 
				         {
			
 
				             return pid;
			
 
				         }
			
@@ -394,12 +394,12 @@ template IdentPtr HashTbl::FindExistingPid<char16>(
 
				 
			
 
				 bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch)
			
 
				 {
			
 
				-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, prgch + cch, cch);
			
 
				+    uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
			
 
				 
			
 
				     for (auto pid = m_prgpidName[luHash & m_luMask]; pid; pid = pid->m_pidNext)
			
 
				     {
			
 
				         if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
			
 
				-            HashTbl::CharsAreEqual(pid->m_sz, prgch + cch, prgch, cch))
			
 
				+            HashTbl::CharsAreEqual(pid->m_sz, prgch + cch, prgch))
			
 
				         {
			
 
				             return true;
			
 
				         }
			
@@ -419,7 +419,7 @@ bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch)
 
				 // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
			
 
				 tokens HashTbl::TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, uint32 cch)
			
 
				 {
			
 
				-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, prgch + cch, cch);
			
 
				+    uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
			
 
				 
			
 
				     // look for a keyword
			
 
				 #include "kwds_sw.h"
			
@@ -446,7 +446,7 @@ LDefault:
 
				 // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
			
 
				 tokens HashTbl::TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode)
			
 
				 {
			
 
				-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, prgch + cch, cch);
			
 
				+    uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
			
 
				 
			
 
				     // look for a keyword
			
 
				 #include "kwds_sw.h"
			
--- a/lib/Parser/Hash.h
+++ b/lib/Parser/Hash.h
@@ -21,8 +21,8 @@ typedef StaticSymLen<0> StaticSym;
 
				 /***************************************************************************
			
 
				 Hashing functions. Definitions in core\hashfunc.cpp.
			
 
				 ***************************************************************************/
			
 
				-ULONG CaseSensitiveComputeHashCch(LPCOLESTR prgch, LPCOLESTR end, int32 cch);
			
 
				-ULONG CaseSensitiveComputeHashCch(LPCUTF8 prgch, LPCUTF8 end, int32 cch);
			
 
				+ULONG CaseSensitiveComputeHash(LPCOLESTR prgch, LPCOLESTR end);
			
 
				+ULONG CaseSensitiveComputeHash(LPCUTF8 prgch, LPCUTF8 end);
			
 
				 ULONG CaseInsensitiveComputeHash(LPCOLESTR posz);
			
 
				 
			
 
				 enum
			
@@ -407,22 +407,22 @@ private:
 
				     uint CountAndVerifyItems(IdentPtr *buckets, uint bucketCount, uint mask);
			
 
				 #endif
			
 
				 
			
 
				-    static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(cch2) LPCOLESTR psz2, LPCOLESTR psz2end, int32 cch2)
			
 
				+    static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) LPCOLESTR psz2, LPCOLESTR psz2end)
			
 
				     {
			
 
				-        Unused(psz2end);
			
 
				-        return memcmp(psz1, psz2, cch2 * sizeof(OLECHAR)) == 0;
			
 
				+        return memcmp(psz1, psz2, (psz2end - psz2) * sizeof(OLECHAR)) == 0;
			
 
				     }
			
 
				-    static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end, int32 cch2)
			
 
				+    static bool CharsAreEqual(__in_z LPCOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end)
			
 
				     {
			
 
				-        return utf8::CharsAreEqual(psz1, psz2, psz2end, cch2, utf8::doAllowThreeByteSurrogates);
			
 
				+        return utf8::CharsAreEqual(psz1, psz2, psz2end, utf8::doAllowThreeByteSurrogates);
			
 
				     }
			
 
				-    static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(cch2) char const * psz2, char const * psz2end, int32 cch2)
			
 
				+    static bool CharsAreEqual(__in_z LPCOLESTR psz1, __in_ecount(psz2end - psz2) char const * psz2, char const * psz2end)
			
 
				     {
			
 
				-        Unused(psz2end);
			
 
				-        while (cch2-- > 0)
			
 
				+        while (psz2 < psz2end)
			
 
				         {
			
 
				             if (*psz1++ != *psz2++)
			
 
				+            {
			
 
				                 return false;
			
 
				+            }
			
 
				         }
			
 
				         return true;
			
 
				     }
			
--- a/lib/Parser/HashFunc.cpp
+++ b/lib/Parser/HashFunc.cpp
@@ -16,34 +16,34 @@
 
				  *  of the hash function so things don't go out of sync.
			
 
				  */
			
 
				 
			
 
				-ULONG CaseSensitiveComputeHashCch(LPCOLESTR prgch, LPCOLESTR end, int32 cch)
			
 
				+ULONG CaseSensitiveComputeHash(LPCOLESTR prgch, LPCOLESTR end)
			
 
				 {
			
 
				-    Unused(end);
			
 
				     ULONG luHash = 0;
			
 
				 
			
 
				-    while (cch-- > 0)
			
 
				+    while (prgch < end)
			
 
				+    {
			
 
				         luHash = 17 * luHash + *(char16 *)prgch++;
			
 
				+    }
			
 
				     return luHash;
			
 
				 }
			
 
				 
			
 
				-ULONG CaseSensitiveComputeHashCch(LPCUTF8 prgch, LPCUTF8 end, int32 cch)
			
 
				+ULONG CaseSensitiveComputeHash(LPCUTF8 prgch, LPCUTF8 end)
			
 
				 {
			
 
				     utf8::DecodeOptions options = utf8::doAllowThreeByteSurrogates;
			
 
				     ULONG luHash = 0;
			
 
				 
			
 
				-    while (cch-- > 0)
			
 
				+    while (prgch < end)
			
 
				     {
			
 
				         luHash = 17 * luHash + utf8::Decode(prgch, end, options);
			
 
				     }
			
 
				     return luHash;
			
 
				 }
			
 
				 
			
 
				-ULONG CaseSensitiveComputeHashCch(char const * prgch, char const * end, int32 cch)
			
 
				+ULONG CaseSensitiveComputeHash(char const * prgch, char const * end)
			
 
				 {
			
 
				-    Unused(end);
			
 
				     ULONG luHash = 0;
			
 
				 
			
 
				-    while (cch-- > 0)
			
 
				+    while (prgch < end)
			
 
				     {
			
 
				         Assert(utf8::IsStartByte(*prgch) && !utf8::IsLeadByte(*prgch));
			
 
				         luHash = 17 * luHash + *prgch++;
			
--- a/test/utf8/rlexe.xml
+++ b/test/utf8/rlexe.xml
@@ -10,26 +10,22 @@
 
				   <test>
			
 
				     <default>
			
 
				       <files>unicode_digit_as_identifier_should_work.js</files>
			
 
				-      <baseline />
			
 
				       <tags>exclude_serialized,bugfix</tags>
			
 
				     </default>
			
 
				   </test>
			
 
				   <test>
			
 
				     <default>
			
 
				       <files>surrogatepair.js</files>
			
 
				-      <baseline />
			
 
				     </default>
			
 
				   </test>
			
 
				   <test>
			
 
				     <default>
			
 
				       <files>bugGH2386.js</files>
			
 
				-      <baseline />
			
 
				     </default>
			
 
				   </test>
			
 
				   <test>
			
 
				     <default>
			
 
				       <files>unicode_sequence_serialized.js</files>
			
 
				-      <baseline />
			
 
				       <compile-flags>-forceserialized -oopjit-</compile-flags>
			
 
				     </default>
			
 
				   </test>
			
--- a/test/utf8/surrogatepair.js
+++ b/test/utf8/surrogatepair.js
@@ -9,7 +9,7 @@ var y = "function () { '鄏𡄻�莞�遲���屢���箋成鄏賴旭鄑温收鄏擒�鄏賴忖
 
				 var x = function () { '鄏𡄻�莞�遲���屢���箋成鄏賴旭鄑温收鄏擒�鄏賴忖 鄏兒江鄏眇成鄑温戍鄍�' ;WScript.Echo('hello'); }

			
 
				 

			
 
				 // 2 bytes 

			
 
				-var y2 = "function () { '𥌓 kugu' ;WScript.Echo('hello'); }" 

			
 
				-var x2 = function () { '𥌓 kugu' ;WScript.Echo('hello'); } 

			
 
				+var y2 = "function () { '羹癟 ku�u' ;WScript.Echo('hello'); }" 

			
 
				+var x2 = function () { '羹癟 ku�u' ;WScript.Echo('hello'); } 

			
 
				 

			
 
				 WScript.Echo((x.toString() === y && x2.toString() === y2) ? "PASS" : "FAIL");
			
--- a/test/utf8/unicode_digit_as_identifier_should_work.js
+++ b/test/utf8/unicode_digit_as_identifier_should_work.js
@@ -3,4 +3,6 @@
 
				 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.

			
 
				 //-------------------------------------------------------------------------------------------------------

			
 
				 

			
 
				-var a᠐᠙ᠠᡷᢀᡨᡩᡪᡫ=20;
			
 
				+var a᠐᠙ᠠᡷᢀᡨᡩᡪᡫ=20;

			
 
				+

			
 
				+print('pass');
			
--- a/test/utf8/unicode_sequence_serialized.js
+++ b/test/utf8/unicode_sequence_serialized.js
@@ -6,3 +6,5 @@
 
				 (function () {

			
 
				        /(오)/ ;

			
 
				 })();

			
 
				+

			
 
				+print('pass');