8 년 전 · b29193ee9d
--- a/bin/NativeTests/CodexTests.cpp
+++ b/bin/NativeTests/CodexTests.cpp
@@ -212,4 +212,72 @@ namespace CodexTest
 
				             CHECK(sourceBuffer[i] == (const char16)encodedBuffer[i]);
			
 
				         }
			
 
				     }
			
 
				+
			
 
				+    template <typename TTestCase, typename TDecodeFunc>
			
 
				+    void RunUtf8DecodeTestCase(const TTestCase &testCases, const TDecodeFunc func)
			
 
				+    {
			
 
				+        const int numTestCases = _countof(testCases);
			
 
				+        const charcount_t charCount = _countof(testCases[0].result);
			
 
				+        char16 decodedBuffer[charCount + 1]; // +1 in case a null-terminating func is passed in
			
 
				+
			
 
				+        for (int i = 0; i < numTestCases; i++)
			
 
				+        {
			
 
				+            bool chunkEndsInTruncatedSequence = false;
			
 
				+            size_t decodedCount = func(decodedBuffer, testCases[i].utf8Encoding, testCases[i].utf8Encoding + testCases[i].bytesToDecode, utf8::DecodeOptions::doChunkedEncoding, &chunkEndsInTruncatedSequence);
			
 
				+            CHECK(decodedCount == testCases[i].expectedDecodedChars);
			
 
				+
			
 
				+            for (size_t j = 0; j < decodedCount; j++)
			
 
				+            {
			
 
				+                CHECK(decodedBuffer[j] == testCases[i].result[j]);
			
 
				+            }
			
 
				+
			
 
				+            CHECK(testCases[i].shouldEndInTruncation == chunkEndsInTruncatedSequence);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    TEST_CASE("CodexTest_DecodeUnitsInto_ChunkEndsInTruncatedSequence", "[CodexTest]")
			
 
				+    {
			
 
				+        struct TestCase
			
 
				+        {
			
 
				+            int bytesToDecode;
			
 
				+            size_t expectedDecodedChars;
			
 
				+            bool shouldEndInTruncation;
			
 
				+            char16 result[8];
			
 
				+            utf8char_t utf8Encoding[8];
			
 
				+        };
			
 
				+
			
 
				+        TestCase testCases[] = {
			
 
				+            { 2, 1, false, { 0xc1 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 2, false, { 0xfffd, 0x79 },{ 0xc3, 0x79 } }, // Invalid 2-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xc3, 0x79 } }, // Invalid 2-byte sequence truncated at the end of the chunk
			
 
				+            { 3, 1, false, { 0x3042 },{ 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
			
 
				+            { 3, 3, false, { 0xfffd, 0x79, 0xfffd }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
			
 
				+            { 3, 3, false, { 0xfffd, 0xfffd, 0x79 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
			
 
				+            { 4, 2, false, { 0xd9c4, 0xdc83 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 4, 4, false, { 0xfffd, 0x79, 0xfffd, 0xfffd }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 3, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 4, 4, false, { 0xfffd, 0xfffd, 0x79, 0xfffd }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 4, 4, false, { 0xfffd, 0xfffd, 0xfffd, 0x79 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence
			
 
				+            { 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+            { 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
			
 
				+        };
			
 
				+        
			
 
				+        RunUtf8DecodeTestCase(testCases, utf8::DecodeUnitsIntoAndNullTerminateNoAdvance);
			
 
				+    }
			
 
				 };
			
--- a/lib/Common/Codex/Utf8Codex.cpp
+++ b/lib/Common/Codex/Utf8Codex.cpp
@@ -451,17 +451,17 @@ LSlowPath:
 
				     }
			
 
				 
			
 
				     _Use_decl_annotations_
			
 
				-    size_t DecodeUnitsIntoAndNullTerminate(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
			
 
				+    size_t DecodeUnitsIntoAndNullTerminate(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options, bool *chunkEndsAtTruncatedSequence)
			
 
				     {
			
 
				-        size_t result = DecodeUnitsInto(buffer, pbUtf8, pbEnd, options);
			
 
				+        size_t result = DecodeUnitsInto(buffer, pbUtf8, pbEnd, options, chunkEndsAtTruncatedSequence);
			
 
				         buffer[result] = 0;
			
 
				         return result;
			
 
				     }
			
 
				 
			
 
				     _Use_decl_annotations_
			
 
				-    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
			
 
				+    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options, bool *chunkEndsAtTruncatedSequence)
			
 
				     {
			
 
				-        return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options);
			
 
				+        return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options, chunkEndsAtTruncatedSequence);
			
 
				     }
			
 
				 
			
 
				     bool CharsAreEqual(LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, DecodeOptions options)
			
--- a/lib/Common/Codex/Utf8Codex.h
+++ b/lib/Common/Codex/Utf8Codex.h
@@ -277,9 +277,9 @@ namespace utf8
 
				     size_t DecodeUnitsInto(_Out_writes_(pbEnd - pbUtf8) char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault, bool *chunkEndsAtTruncatedSequence = nullptr);
			
 
				 
			
 
				     // Decode cb bytes from ptr to into buffer returning the number of characters converted and written to buffer (excluding the null terminator)
			
 
				-    size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
			
 
				+    size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault, bool *chunkEndsAtTruncatedSequence = nullptr);
			
 
				 
			
 
				-    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
			
 
				+    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault, bool *chunkEndsAtTruncatedSequence = nullptr);
			
 
				 
			
 
				     // Encode a UTF-8 sequence into a UTF-8 sequence (which is just a memcpy). This is included for convenience in templates
			
 
				     // when the character encoding is a template parameter.