há 9 anos atrás · f85beb7c04
--- a/bin/NativeTests/FileLoadHelpers.cpp
+++ b/bin/NativeTests/FileLoadHelpers.cpp
@@ -9,7 +9,7 @@ HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents,
 
				 {
			
 
				     HRESULT hr = S_OK;
			
 
				     LPCWSTR contentsRaw = nullptr;
			
 
				-    byte * pRawBytes = nullptr;
			
 
				+    LPCUTF8 pRawBytes = nullptr;
			
 
				     UINT lengthBytes = 0;
			
 
				     bool isUtf8 = false;
			
 
				     contents = nullptr;
			
@@ -119,7 +119,7 @@ HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents,
 
				             IfFailGo(E_OUTOFMEMORY);
			
 
				         }
			
 
				 
			
 
				-        utf8::DecodeIntoAndNullTerminate((char16*) contents, pRawBytes, pRawBytes + lengthBytes, cUtf16Chars, decodeOptions);
			
 
				+        utf8::DecodeUnitsIntoAndNullTerminate((char16*)contents, pRawBytes, pRawBytes + lengthBytes, decodeOptions);
			
 
				     }
			
 
				 
			
 
				 Error:
			
--- a/lib/Common/Codex/Utf8Codex.cpp
+++ b/lib/Common/Codex/Utf8Codex.cpp
@@ -376,40 +376,9 @@ LFourByte:
 
				         else
			
 
				             return ptr;
			
 
				     }
			
 
				-
			
 
				-    void DecodeInto(__out_ecount_full(cch) char16 *buffer, LPCUTF8 ptr, LPCUTF8 end, size_t cch, DecodeOptions options)
			
 
				-    {
			
 
				-        DecodeOptions localOptions = options;
			
 
				-
			
 
				-        if (!ShouldFastPath(ptr, buffer)) goto LSlowPath;
			
 
				-
			
 
				-LFastPath:
			
 
				-        while (cch >= 4)
			
 
				-        {
			
 
				-            uint32 bytes = *(uint32 *)ptr;
			
 
				-            if ((bytes & 0x80808080) != 0) goto LSlowPath;
			
 
				-            ((uint32 *)buffer)[0] = (bytes & 0x7F) | ((bytes << 8) & 0x7F0000);
			
 
				-            ((uint32 *)buffer)[1] = ((bytes >> 16) & 0x7F) | ((bytes >> 8) & 0x7F0000);
			
 
				-            ptr += 4;
			
 
				-            buffer += 4;
			
 
				-            cch -= 4;
			
 
				-        }
			
 
				-LSlowPath:
			
 
				-        while (cch-- > 0)
			
 
				-        {
			
 
				-            *buffer++ = Decode(ptr, end, localOptions);
			
 
				-            if (ShouldFastPath(ptr, buffer)) goto LFastPath;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    void DecodeIntoAndNullTerminate(__out_ecount(cch+1) __nullterminated char16 *buffer, LPCUTF8 ptr, LPCUTF8 end, size_t cch, DecodeOptions options)
			
 
				-    {
			
 
				-        DecodeInto(buffer, ptr, end, cch, options);
			
 
				-        buffer[cch] = 0;
			
 
				-    }
			
 
				-
			
 
				-    _Ret_range_(0, pbEnd - _Old_(pbUtf8))
			
 
				-    size_t DecodeUnitsInto(_Out_writes_(pbEnd - pbUtf8) char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
			
 
				+    
			
 
				+    _Use_decl_annotations_
			
 
				+    size_t DecodeUnitsInto(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
			
 
				     {
			
 
				         DecodeOptions localOptions = options;
			
 
				 
			
@@ -454,13 +423,20 @@ LSlowPath:
 
				         return dest - buffer;
			
 
				     }
			
 
				 
			
 
				-    size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
			
 
				+    _Use_decl_annotations_
			
 
				+    size_t DecodeUnitsIntoAndNullTerminate(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
			
 
				     {
			
 
				         size_t result = DecodeUnitsInto(buffer, pbUtf8, pbEnd, options);
			
 
				         buffer[(int)result] = 0;
			
 
				         return result;
			
 
				     }
			
 
				 
			
 
				+    _Use_decl_annotations_
			
 
				+    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
			
 
				+    {
			
 
				+        return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options);
			
 
				+    }
			
 
				+
			
 
				     bool CharsAreEqual(__in_ecount(cch) LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, size_t cch, DecodeOptions options)
			
 
				     {
			
 
				         DecodeOptions localOptions = options;
			
--- a/lib/Common/Codex/Utf8Codex.h
+++ b/lib/Common/Codex/Utf8Codex.h
@@ -273,21 +273,6 @@ namespace utf8
 
				         return PrevCharFull(ptr, start);
			
 
				     }
			
 
				 
			
 
				-    // Decode a UTF-8 sequence of cch UTF-16 characters into buffer. ptr could advance up to 3 times
			
 
				-    // longer than cch so DecodeInto should only be used when it is already known that
			
 
				-    // ptr refers to at least cch number of UTF-8 sequences.
			
 
				-    void DecodeInto(__out_ecount_full(cch) char16 *buffer, LPCUTF8 ptr, LPCUTF8 end, size_t cch, DecodeOptions options = doDefault);
			
 
				-
			
 
				-    // Provided for dual-mode templates
			
 
				-    inline void DecodeInto(__out_ecount_full(cch) char16 *buffer, const char16 *ptr, const char16 *end, size_t cch, DecodeOptions /* options */ = doDefault)
			
 
				-    {
			
 
				-        Unused(end);
			
 
				-        memcpy_s(buffer, cch * sizeof(char16), ptr, cch * sizeof(char16));
			
 
				-    }
			
 
				-
			
 
				-    // Like DecodeInto but ensures buffer ends with a NULL at buffer[cch].
			
 
				-    void DecodeIntoAndNullTerminate(__out_ecount(cch+1) __nullterminated char16 *buffer, LPCUTF8 ptr, LPCUTF8 end, size_t cch, DecodeOptions options = doDefault);
			
 
				-
			
 
				     // Decode cb bytes from ptr to into buffer returning the number of characters converted and written to buffer
			
 
				     _Ret_range_(0, pbEnd - _Old_(pbUtf8))
			
 
				     size_t DecodeUnitsInto(_Out_writes_(pbEnd - pbUtf8) char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
			
@@ -295,6 +280,8 @@ namespace utf8
 
				     // Decode cb bytes from ptr to into buffer returning the number of characters converted and written to buffer (excluding the null terminator)
			
 
				     size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
			
 
				 
			
 
				+    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
			
 
				+
			
 
				     // Encode a UTF-8 sequence into a UTF-8 sequence (which is just a memcpy). This is included for convenience in templates
			
 
				     // when the character encoding is a template parameter.
			
 
				     __range(cch, cch)
			
--- a/lib/Common/Codex/Utf8Helper.h
+++ b/lib/Common/Codex/Utf8Helper.h
@@ -73,7 +73,7 @@ namespace utf8
 
				         // Some node tests depend on the utf8 decoder not swallowing invalid unicode characters
			
 
				         // instead of replacing them with the "replacement" chracter. Pass a flag to our 
			
 
				         // decoder to require such behavior
			
 
				-        utf8::DecodeIntoAndNullTerminate(destString, (LPCUTF8) sourceString, (LPCUTF8) sourceString + cbSourceString, cchDestString, DecodeOptions::doAllowInvalidWCHARs);
			
 
				+        utf8::DecodeUnitsIntoAndNullTerminateNoAdvance(destString, (LPCUTF8) sourceString, (LPCUTF8) sourceString + cbSourceString, DecodeOptions::doAllowInvalidWCHARs);
			
 
				         Assert(destString[cchDestString] == 0);
			
 
				         static_assert(sizeof(utf8char_t) == sizeof(char), "Needs to be valid for cast");
			
 
				         *destStringPtr = destString;
			
--- a/lib/Jsrt/JsrtDebugUtils.cpp
+++ b/lib/Jsrt/JsrtDebugUtils.cpp
@@ -61,17 +61,19 @@ void JsrtDebugUtils::AddSourceLengthAndTextToObject(Js::DynamicObject* object, J
 
				     LPCUTF8 source = functionBody->GetStartOfDocument(_u("Source for debugging"));
			
 
				     size_t cbLength = functionBody->GetUtf8SourceInfo()->GetCbLength();
			
 
				     size_t startByte = utf8::CharacterIndexToByteIndex(source, cbLength, (const charcount_t)statementMap->sourceSpan.begin);
			
 
				+    size_t endByte = utf8::CharacterIndexToByteIndex(source, cbLength, (const charcount_t)statementMap->sourceSpan.end);
			
 
				+    int cch = statementMap->sourceSpan.end - statementMap->sourceSpan.begin;
			
 
				 
			
 
				-    int byteLength = statementMap->sourceSpan.end - statementMap->sourceSpan.begin;
			
 
				+    JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceLength, (double)cch, functionBody->GetScriptContext());
			
 
				 
			
 
				-    JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceLength, (double)byteLength, functionBody->GetScriptContext());
			
 
				-
			
 
				-    AutoArrayPtr<char16> sourceContent(HeapNewNoThrowArray(char16, byteLength + 1), byteLength + 1);
			
 
				+    AutoArrayPtr<char16> sourceContent(HeapNewNoThrowArray(char16, cch + 1), cch + 1);
			
 
				     if (sourceContent != nullptr)
			
 
				     {
			
 
				+        LPCUTF8 pbStart = source + startByte;
			
 
				+        LPCUTF8 pbEnd = pbStart + (endByte - startByte);
			
 
				         utf8::DecodeOptions options = functionBody->GetUtf8SourceInfo()->IsCesu8() ? utf8::doAllowThreeByteSurrogates : utf8::doDefault;
			
 
				-        utf8::DecodeIntoAndNullTerminate(sourceContent, source + startByte, source + startByte + cbLength, byteLength, options);
			
 
				-        JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceText, sourceContent, byteLength, functionBody->GetScriptContext());
			
 
				+        utf8::DecodeUnitsIntoAndNullTerminate(sourceContent, pbStart, pbEnd, options);
			
 
				+        JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceText, sourceContent, cch, functionBody->GetScriptContext());
			
 
				     }
			
 
				     else
			
 
				     {
			
@@ -96,7 +98,7 @@ void JsrtDebugUtils::AddSouceToObject(Js::DynamicObject * object, Js::Utf8Source
 
				         LPCUTF8 source = utf8SourceInfo->GetSource();
			
 
				         size_t cbLength = utf8SourceInfo->GetCbLength();
			
 
				         utf8::DecodeOptions options = utf8SourceInfo->IsCesu8() ? utf8::doAllowThreeByteSurrogates : utf8::doDefault;
			
 
				-        utf8::DecodeIntoAndNullTerminate(sourceContent, source, source + cbLength, cchLength, options);
			
 
				+        utf8::DecodeUnitsIntoAndNullTerminate(sourceContent, source, source + cbLength, options);
			
 
				         JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::source, sourceContent, cchLength, utf8SourceInfo->GetScriptContext());
			
 
				     }
			
 
				     else
			
--- a/lib/Parser/Hash.h
+++ b/lib/Parser/Hash.h
@@ -434,7 +434,8 @@ private:
 
				     }
			
 
				     static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, LPCUTF8 psz2, LPCUTF8 psz2end, int32 cch)
			
 
				     {
			
 
				-        utf8::DecodeIntoAndNullTerminate(psz1, psz2, psz2end, cch);
			
 
				+        Unused(cch);
			
 
				+        utf8::DecodeUnitsIntoAndNullTerminate(psz1, psz2, psz2end);
			
 
				     }
			
 
				     static void CopyString(__in_ecount(cch + 1) LPOLESTR psz1, __in_ecount(cch) char const * psz2, char const * psz2end, int32 cch)
			
 
				     {
			
--- a/lib/Parser/Scan.cpp
+++ b/lib/Parser/Scan.cpp
@@ -2516,12 +2516,13 @@ HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR*
 
				     }
			
 
				 
			
 
				     typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine);
			
 
				-    typename EncodingPolicy::EncodedCharPtr pEnd = AdjustedLast();
			
 
				 
			
 
				     // Determine the length by scanning for the next newline
			
 
				-    charcount_t cch = LineLength(pStart, pEnd);
			
 
				+    charcount_t cch = LineLength(pStart, m_pchLast);
			
 
				     Assert(cch <= LONG_MAX);
			
 
				 
			
 
				+    typename EncodingPolicy::EncodedCharPtr pEnd = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine + cch : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch);
			
 
				+
			
 
				     *pbstrLine = SysAllocStringLen(NULL, cch);
			
 
				     if (!*pbstrLine)
			
 
				     {
			
--- a/lib/Parser/Scan.h
+++ b/lib/Parser/Scan.h
@@ -294,7 +294,7 @@ protected:
 
				     void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end)
			
 
				     {
			
 
				         m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doSecondSurrogatePair);
			
 
				-        utf8::DecodeInto(pch, start, end, cch, m_decodeOptions);
			
 
				+        utf8::DecodeUnitsInto(pch, start, end, m_decodeOptions);
			
 
				     }
			
 
				 
			
 
				 
			
--- a/lib/Runtime/Base/Utf8SourceInfo.h
+++ b/lib/Runtime/Base/Utf8SourceInfo.h
@@ -78,10 +78,22 @@ namespace Js
 
				         void RetrieveSourceText(__out_ecount_full(cchLim - cchMin) LPOLESTR cpText, charcount_t cchMin, charcount_t cchLim) const
			
 
				         {
			
 
				             size_t cbLength = GetCbLength(_u("Utf8SourceInfo::RetrieveSourceText"));
			
 
				-            LPCUTF8 pSource = GetSource(_u("Utf8SourceInfo::RetrieveSourceText"));
			
 
				-            size_t cbMin = cbLength == GetCchLength() ? cchMin : utf8::CharacterIndexToByteIndex(pSource, cbLength, cchMin, utf8::doAllowThreeByteSurrogates);
			
 
				+            LPCUTF8 source = GetSource(_u("Utf8SourceInfo::RetrieveSourceText"));
			
 
				+            LPCUTF8 pbStart = nullptr;
			
 
				+            LPCUTF8 pbEnd = nullptr;
			
 
				             
			
 
				-            utf8::DecodeInto(cpText, pSource + cbMin, pSource + cbMin + cbLength, cchLim - cchMin, utf8::doAllowThreeByteSurrogates);
			
 
				+            if (cbLength == GetCchLength())
			
 
				+            {
			
 
				+                pbStart = source + cchMin;
			
 
				+                pbEnd = source + cchLim;
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                pbStart = source + utf8::CharacterIndexToByteIndex(source, cbLength, cchMin, utf8::doAllowThreeByteSurrogates);
			
 
				+                pbEnd = source + utf8::CharacterIndexToByteIndex(source, cbLength, cchLim, utf8::doAllowThreeByteSurrogates);
			
 
				+            }
			
 
				+            
			
 
				+            utf8::DecodeUnitsInto(cpText, pbStart, pbEnd, utf8::doAllowThreeByteSurrogates);
			
 
				         }
			
 
				 
			
 
				         size_t CharacterIndexToByteIndex(charcount_t cchIndex) const
			
--- a/lib/Runtime/Language/DynamicProfileStorage.cpp
+++ b/lib/Runtime/Language/DynamicProfileStorage.cpp
@@ -110,7 +110,7 @@ _Success_(return) bool DynamicProfileStorageReaderWriter::ReadUtf8String(__deref
 
				         return false;
			
 
				     }
			
 
				 
			
 
				-    utf8char_t * tempBuffer = NoCheckHeapNewArray(utf8char_t, urllen);
			
 
				+    utf8char_t* tempBuffer = NoCheckHeapNewArray(utf8char_t, urllen);
			
 
				     if (tempBuffer == nullptr)
			
 
				     {
			
 
				         Output::Print(_u("ERROR: DynamicProfileStorage: Out of memory reading '%s'\n"), filename);
			
@@ -133,7 +133,7 @@ _Success_(return) bool DynamicProfileStorageReaderWriter::ReadUtf8String(__deref
 
				         HeapDeleteArray(urllen, tempBuffer);
			
 
				         return false;
			
 
				     }
			
 
				-    utf8::DecodeIntoAndNullTerminate(name, tempBuffer, tempBuffer + urllen, length);
			
 
				+    utf8::DecodeUnitsIntoAndNullTerminateNoAdvance(name, tempBuffer, tempBuffer + urllen);
			
 
				     NoCheckHeapDeleteArray(urllen, tempBuffer);
			
 
				     *str = name;
			
 
				     *len = length;
			
--- a/lib/Runtime/Library/JavascriptFunction.cpp
+++ b/lib/Runtime/Library/JavascriptFunction.cpp
@@ -3058,7 +3058,9 @@ LABEL1:
 
				                     charcount_t count = min(DIAG_MAX_FUNCTION_STRING, func->LengthInChars());
			
 
				                     utf8::DecodeOptions options = sourceInfo->IsCesu8() ? utf8::doAllowThreeByteSurrogates : utf8::doDefault;
			
 
				                     LPCUTF8 source = func->GetSource(_u("JavascriptFunction::GetDiagValueString"));
			
 
				-                    utf8::DecodeInto(stringBuilder->AllocBufferSpace(count), source, source + sourceInfo->GetCbLength(_u("JavascriptFunction::GetDiagValueString")), count, options);
			
 
				+                    size_t cbLength = sourceInfo->GetCbLength(_u("JavascriptFunction::GetDiagValueString"));
			
 
				+                    size_t cbIndex = utf8::CharacterIndexToByteIndex(source, cbLength, count, options);
			
 
				+                    utf8::DecodeUnitsInto(stringBuilder->AllocBufferSpace(count), source, source + cbIndex, options);
			
 
				                     stringBuilder->IncreaseCount(count);
			
 
				                     return TRUE;
			
 
				                 }
			
--- a/lib/Runtime/Library/ScriptFunction.cpp
+++ b/lib/Runtime/Library/ScriptFunction.cpp
@@ -483,12 +483,12 @@ namespace Js
 
				             // Consider: Should we have a JavascriptUtf8Substring class which defers decoding
			
 
				             // until it's needed?
			
 
				 
			
 
				-            BufferStringBuilder builder(pFuncBody->LengthInChars(), scriptContext);
			
 
				-            // TODO: What about surrogate pairs?
			
 
				+            charcount_t cch = pFuncBody->LengthInChars();
			
 
				+            size_t cbLength = pFuncBody->LengthInBytes();
			
 
				+            LPCUTF8 pbStart = pFuncBody->GetSource(_u("ScriptFunction::EnsureSourceString"));
			
 
				+            BufferStringBuilder builder(cch, scriptContext);
			
 
				             utf8::DecodeOptions options = pFuncBody->GetUtf8SourceInfo()->IsCesu8() ? utf8::doAllowThreeByteSurrogates : utf8::doDefault;
			
 
				-            LPCUTF8 ptr = pFuncBody->GetSource(_u("ScriptFunction::EnsureSourceString"));
			
 
				-            size_t cbLength = pFuncBody->GetUtf8SourceInfo()->GetCbLength(_u("ScriptFunction::EnsureSourceString"));
			
 
				-            utf8::DecodeInto(builder.DangerousGetWritableBuffer(), ptr, ptr + cbLength, pFuncBody->LengthInChars(), options);
			
 
				+            utf8::DecodeUnitsInto(builder.DangerousGetWritableBuffer(), pbStart, pbStart + cbLength, options);
			
 
				             if (pFuncBody->IsLambda() || isActiveScript || this->GetFunctionInfo()->IsClassConstructor()
			
 
				 #ifdef ENABLE_PROJECTION
			
 
				                 || scriptContext->GetConfig()->IsWinRTEnabled()
			
--- a/lib/WasmReader/WasmBinaryReader.cpp
+++ b/lib/WasmReader/WasmBinaryReader.cpp
@@ -1019,7 +1019,7 @@ WasmBinaryReader::CvtUtf8Str(LPCUTF8 name, uint32 nameLen, charcount_t* dstLengt
 
				     {
			
 
				         Js::Throw::OutOfMemory();
			
 
				     }
			
 
				-    utf8::DecodeIntoAndNullTerminate(contents, name, name + nameLen, utf16Len, decodeOptions);
			
 
				+    utf8::DecodeUnitsIntoAndNullTerminate(contents, name, name + nameLen, decodeOptions);
			
 
				     if (dstLength)
			
 
				     {
			
 
				         *dstLength = utf16Len;
			
--- a/test/utf8/bugGH2386.js
+++ b/test/utf8/bugGH2386.js
@@ -0,0 +1,74 @@
 
				+//-------------------------------------------------------------------------------------------------------

			
 
				+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.

			
 
				+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.

			
 
				+//-------------------------------------------------------------------------------------------------------

			
 
				+

			
 
				+function toHexCP(c, cp) {

			
 
				+    var hex = "0123456789abcdef";

			
 
				+    return String.fromCharCode(hex.charCodeAt((c >> (cp * 4)) & 0xf));

			
 
				+}

			
 
				+

			
 
				+function toHex(str) {

			
 
				+    var result = "";

			
 
				+    for(var i = 0; i < str.length; i++) {

			
 
				+        var c = str.charCodeAt(i);

			
 
				+        for (var cp = 3; cp >= 0; cp--) {

			
 
				+            result += toHexCP(c, cp);

			
 
				+        }

			
 
				+    }

			
 
				+    return "0x" + result;

			
 
				+}

			
 
				+

			
 
				+var CHECK = function(h)

			
 
				+{

			
 
				+    var hex_str = String.fromCharCode(h);

			
 
				+    var pattern = eval("/" + hex_str + "/");

			
 
				+    if (toHex(hex_str) != toHex(pattern.source)) {

			
 
				+        throw new Error("String encoding has failed? "

			
 
				+          + toHex(hex_str) + " != " + toHex(pattern.source));

			
 
				+    }

			
 
				+}

			
 
				+

			
 
				+CHECK("0x0000");

			
 
				+CHECK("0x0080");

			
 
				+CHECK("0x0800");

			
 
				+CHECK("0xFF80");

			
 
				+CHECK("0xFFFD");

			
 
				+CHECK("0xFFFFFF");

			
 
				+CHECK("0xFFFFFF80");

			
 
				+CHECK("0xFFFFFF80FF");

			
 
				+

			
 
				+function CHECK_EVAL(s)

			
 
				+{

			
 
				+    var eval_s = new RegExp( s ).source;

			
 
				+    if (s !== eval_s) throw new Error(

			
 
				+      "String Encoding is broken ? ->" + s);

			
 
				+}

			
 
				+

			
 
				+var CH1 = String.fromCharCode('0xe4b8ad');

			
 
				+var CH2 = String.fromCharCode('0xe69687');

			
 
				+var CH3 = String.fromCharCode('0xe336b2');

			
 
				+var CH4 = String.fromCharCode('0xe336b2aa');

			
 
				+var CHX = String.fromCharCode("0x80808080");

			
 
				+

			
 
				+var BUFF = '';

			
 
				+for(var i = 0; i < 16; i++)

			
 
				+{

			
 
				+    var str = CH1;

			
 
				+

			
 
				+    CHECK_EVAL(str + CHX + BUFF)

			
 
				+    CHECK_EVAL(str + BUFF + CHX)

			
 
				+    CHECK_EVAL(str + BUFF + CHX + '1')

			
 
				+    str += BUFF + CH2 + CHX;

			
 
				+    BUFF += '1';

			
 
				+

			
 
				+    CHECK_EVAL(str + '1' + CH3);

			
 
				+    CHECK_EVAL(str + '12' + CH3);

			
 
				+    CHECK_EVAL(str + '123' + CH3);

			
 
				+

			
 
				+    CHECK_EVAL(str + '1' + CH4);

			
 
				+    CHECK_EVAL(str + '12' + CH4);

			
 
				+    CHECK_EVAL(str + '123' + CH4)

			
 
				+}

			
 
				+

			
 
				+console.log("PASS");
			
--- a/test/utf8/rlexe.xml
+++ b/test/utf8/rlexe.xml
@@ -20,4 +20,17 @@
 
				       <baseline />
			
 
				     </default>
			
 
				   </test>
			
 
				+  <test>
			
 
				+    <default>
			
 
				+      <files>bugGH2386.js</files>
			
 
				+      <baseline />
			
 
				+    </default>
			
 
				+  </test>
			
 
				+  <test>
			
 
				+    <default>
			
 
				+      <files>unicode_sequence_serialized.js</files>
			
 
				+      <baseline />
			
 
				+      <compile-flags>-forceserialized -oopjit-</compile-flags>
			
 
				+    </default>
			
 
				+  </test>
			
 
				 </regress-exe>
			
--- a/test/utf8/surrogatepair.js
+++ b/test/utf8/surrogatepair.js
@@ -7,4 +7,9 @@
 
				 // For this test case to work, please save this file with UTF-8 encoding

			
 
				 var y = "function () { '鄏𡄻�莞�遲���屢���箋成鄏賴旭鄑温收鄏擒�鄏賴忖 鄏兒江鄏眇成鄑温戍鄍�' ;WScript.Echo('hello'); }"

			
 
				 var x = function () { '鄏𡄻�莞�遲���屢���箋成鄏賴旭鄑温收鄏擒�鄏賴忖 鄏兒江鄏眇成鄑温戍鄍�' ;WScript.Echo('hello'); }

			
 
				-WScript.Echo(x.toString() === y ? "PASS" : "FAIL");
			
 
				+

			
 
				+// 2 bytes 

			
 
				+var y2 = "function () { '𥌓 kugu' ;WScript.Echo('hello'); }" 

			
 
				+var x2 = function () { '𥌓 kugu' ;WScript.Echo('hello'); } 

			
 
				+

			
 
				+WScript.Echo((x.toString() === y && x2.toString() === y2) ? "PASS" : "FAIL");
			
--- a/test/utf8/unicode_sequence_serialized.js
+++ b/test/utf8/unicode_sequence_serialized.js
@@ -0,0 +1,3 @@
 
				+(function () {

			
 
				+       /(오)/ ;

			
 
				+})();