Просмотр исходного кода

Fix #2670. Set stdout in ch.exe to UTF-8 mode so console.log gets Unicode output support.

Only use _setmode on Windows; it seems stdout is UTF-8 mode by default on Linux.

This change would break printf because it does not understand UTF-8. To that end, this change also incorporates the following changes:

* Replace printf with wprintf and update formatter strings.
* Replace wprintf with Output::Print except in ch where it would be a new dependency.
* Convert L"" strings to _u("") strings in more places where they were apparently overlooked before.
Doug Ilijev 9 лет назад
Родитель
Сommit
103bb1c552
4 измененных файлов с 39 добавлено и 0 удалено
  1. 12 0
      bin/ch/ch.cpp
  2. 6 0
      test/utf8/rlexe.xml
  3. 2 0
      test/utf8/utf8_console_log.baseline
  4. 19 0
      test/utf8/utf8_console_log.js

+ 12 - 0
bin/ch/ch.cpp

@@ -7,6 +7,7 @@
 #ifdef _WIN32
 #include <winver.h>
 #include <process.h>
+#include <fcntl.h>
 #endif
 
 unsigned int MessageBase::s_messageCount = 0;
@@ -909,6 +910,17 @@ int _cdecl wmain(int argc, __in_ecount(argc) LPWSTR argv[])
 #endif
 
 #ifdef _WIN32
+    // Set the output mode of stdout so we can display non-ASCII characters on the console and redirect to file as UTF-8
+    {
+        int result = _setmode(_fileno(stdout), _O_U8TEXT); // set stdout to UTF-8 mode
+        if (result == -1)
+        {
+            // Failed to set mode. Undefined behavior may result, so exit now.
+            wprintf(_u("Failed to set output stream mode. Exiting...\n"));
+            return EXIT_FAILURE;
+        }
+    }
+
     bool runJITServer = HandleJITServerFlag(argc, argv);
 #endif
     int retval = -1;

+ 6 - 0
test/utf8/rlexe.xml

@@ -42,4 +42,10 @@
       <tags>require_winglob</tags> <!-- Microsoft/ChakraCore#3777: Fix this test under ICU -->
     </default>
   </test>
+  <test>
+    <default>
+      <files>utf8_console_log.js</files>
+      <baseline>utf8_console_log.baseline</baseline>
+    </default>
+  </test>
 </regress-exe>

+ 2 - 0
test/utf8/utf8_console_log.baseline

@@ -0,0 +1,2 @@
+A ¡ ā — 𐐁
+русский 中文

+ 19 - 0
test/utf8/utf8_console_log.js

@@ -0,0 +1,19 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+// 1b: U+0041 (A) Uppercase Latin A
+// 2b: U+00A1 (¡) inverted exclamation mark
+// 2b: U+0101 (ā) LATIN SMALL LETTER A WITH MACRON
+// 3b: U+2014 (—) em dash
+// 4b: U+10401 Deseret Long E -- surrogate pair \uD801\uDC01
+
+const A         = "\u0041";    // U+0041 (ASCII); UTF-16 0x0041       ; UTF-8 0x41
+const iexcl     = "\u00A1";    // U+00A1        ; UTF-16 0x00A1       ; UTF-8 0xC2 0xA0
+const amacron   = "\u0101";    // U+0101        ; UTF-16 0x0101       ; UTF-8 0xC4 0x81
+const emdash    = "\u2014";    // U+2014        ; UTF-16 0x2014       ; UTF-8 0xE2 0x80 0x94
+const desLongE  = "\u{10401}"; // U+10401       ; UTF-16 0xD801 0xDC01; UTF-8 0xF0 0x90 0x90 0x81
+
+console.log(`${A} ${iexcl} ${amacron} ${emdash} ${desLongE}`);
+console.log("русский 中文");