GetCanonicalLocales.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");
  6. function testRangeError(tag) {
  7. assert.throws(function () { Intl.getCanonicalLocales(tag) }, RangeError,
  8. `Tag '${tag}' should throw RangeError`,
  9. `Locale '${tag}' is not well-formed`);
  10. }
  11. /**
  12. * Allows different values to be asserted depending on the Intl implementation
  13. *
  14. * @param {String|String[]} expectedWinGlob
  15. * @param {String|String[]} expectedICU
  16. * @param {String|String[]} actual
  17. * @param {String} message
  18. */
  19. const equal = (function () {
  20. if (WScript.Platform.INTL_LIBRARY === "icu") {
  21. return function (_, expectedICU, actual, message) {
  22. assert.areEqual(expectedICU, actual, message);
  23. }
  24. } else {
  25. assert.isTrue(WScript.Platform.INTL_LIBRARY === "winglob");
  26. return function (expectedWinGlob, _, actual, message) {
  27. assert.areEqual(expectedWinGlob, actual, message);
  28. }
  29. }
  30. })();
  31. const gcl = Intl.getCanonicalLocales;
  32. var tests = [
  33. {
  34. name: "Intl.getCanonicalLocales Functionality (according to ECMA 402 #sec-canonicalizelocalelist)",
  35. // ensure array (or array-like) or convert to array, canonicalize each entry, remove duplicates
  36. body: function () {
  37. // ensure output is an array even if input was not an array
  38. assert.areEqual(Intl.getCanonicalLocales('en'), ['en'], "Input is a singleton string (not an array) -> output is array");
  39. // canonicalize case
  40. assert.areEqual(Intl.getCanonicalLocales(['en']), ['en'], "Input matches output, no lookup is performed");
  41. assert.areEqual(Intl.getCanonicalLocales(['en-us']), ['en-US'], "Canonicalize country casing (en-US) (all-lowercase)");
  42. assert.areEqual(Intl.getCanonicalLocales(['en-Us']), ['en-US'], "Canonicalize country casing (en-US) (mixed-case)");
  43. assert.areEqual(Intl.getCanonicalLocales(['EN-us']), ['en-US'], "Canonicalize country casing (en-US) (completely incorrect casing)");
  44. assert.areEqual(Intl.getCanonicalLocales(['de-de']), ['de-DE'], "Canonicalize country casing (de-DE)");
  45. // array-like objects are be fine (according to spec, arrays are converted to Object anyway)
  46. // ECMA 402 #sec-canonicalizelocalelist
  47. // 5. Let len be ? ToLength(? Get(O, "length")).
  48. // 6. Let k be 0.
  49. // 7. Repeat, while k < len
  50. // Since ToLength(undefined) === 0, we don't enter the loop (essentially treat the input as a zero-length array).
  51. // ToLength(undefined) -> ToInteger(undefined) -> ToNumber(undefined) -> NaN
  52. // ToInteger converts NaN to +0.
  53. assert.areEqual(Intl.getCanonicalLocales({ '0': 'en-us' }), [], "Objects which might look like arrays are fine, but treated as 0 length.");
  54. assert.areEqual(Intl.getCanonicalLocales({ 'a': 'b' }), [], "Arbitrary Objects are fine, treated as 0-length arrays.");
  55. // Objects contained in the input array are fine if their toString is a valid language tag.
  56. assert.areEqual(Intl.getCanonicalLocales(['en-us', { toString() { return 'en-us' } }]), ['en-US'], "Object.toString returning a valid language tag is fine.");
  57. assert.throws(function () { Intl.getCanonicalLocales([{ toString() { return undefined } }]) }, RangeError,
  58. "Object.toString returning a non-string or invalid language tag is RangeError.");
  59. // canonicalization of script code subkey
  60. assert.areEqual(Intl.getCanonicalLocales(['zh-hans-cn']), ['zh-Hans-CN'], "Chinese (zh) Han Simplified (Hans) as used in China (CN)");
  61. assert.areEqual(Intl.getCanonicalLocales(['zh-hant-hk']), ['zh-Hant-HK'], "Chinese (zh) Han Traditional (Hant) as used in Hong Kong (HK)");
  62. // language-extlang form and other non-preferred forms normalize to preferred ISO 639-3
  63. // This should be handled implicitly by canonicalization routine (no knowledge of language tags required),
  64. // but we make sure it works for some actual languages in any case.
  65. // RFC 5646 2.1:
  66. // language = 2-3ALPHA ["-" extlang]
  67. // extlang = 2-3ALPHA *2("-" 3ALPHA)
  68. // https://en.wikipedia.org/wiki/IETF_language_tag#ISO_639-3_and_ISO_639-1
  69. let mandarinChinese = ['cmn', 'zh-cmn']; // Mandarin Chinese (language-extlang: zh-cmn; prefer ISO 639-3: cmn)
  70. let minNanChinese = ['nan', 'zh-nan', 'zh-min-nan']; // Min-Nan Chinese (ISO 639-3: nan)
  71. let hakkaChinese = ['hak', 'zh-hak', 'zh-hakka', 'i-hak']; // Hakka Chinese (ISO 639-3: hak)
  72. let chineseIn = [].concat(mandarinChinese, minNanChinese, hakkaChinese);
  73. let chineseOut = [].concat(mandarinChinese[0], minNanChinese[0], hakkaChinese[0]); // after de-dup should be only these three preferred codes
  74. assert.areEqual(Intl.getCanonicalLocales(chineseIn), chineseOut, "Chinese language-extlang and other forms map to preferred ISO 639-3 codes");
  75. // canonicalization of -u- extension keys
  76. // V8 and CC-ICU convert boolean keys (kn) to boolean string values (including giving them default values),
  77. // which is incorrect. SpiderMonkey and CC-WinGlob correctly avoid this.
  78. // V8 and CC-ICU also give the default value of "yes" to non-boolean keys (co), which also is incorrect.
  79. // Everyone (should) correctly re-order extension keys alphabetically
  80. // Microsoft/ChakraCore#4490 tracks the incorrect defaulting, Microsoft/ChakraCore#2964 tracks the overall investigation
  81. equal("de-DE-u-co-kn", "de-DE-u-co-yes-kn-true", gcl("de-de-u-kn-co")[0]);
  82. equal("de-DE-u-co-phonebk-kn", "de-DE-u-co-phonebk-kn-true", gcl("de-de-u-kn-co-phonebk")[0]);
  83. equal("de-DE-u-co-phonebk-kn-yes", "de-DE-u-co-phonebk-kn-true", gcl("de-DE-u-kn-yes-co-phonebk")[0]);
  84. // De-dupe after locales are canonicalized
  85. assert.areEqual(Intl.getCanonicalLocales(['en-us', 'en-us']), ['en-US'], "No duplicates, same input casing (casing was incorrect)");
  86. assert.areEqual(Intl.getCanonicalLocales(['en-US', 'en-US']), ['en-US'], "No duplicates, same input casing (casing was correct)");
  87. assert.areEqual(Intl.getCanonicalLocales(['en-us', 'en-US']), ['en-US'], "No duplicates, different input casing");
  88. assert.areEqual(
  89. Intl.getCanonicalLocales(["de-de", "de-DE-u-co-phonebk-kn-true", "de-DE-u-kn-true-co-phonebk"]),
  90. ["de-DE", "de-DE-u-co-phonebk-kn-true"],
  91. "No duplicates after re-ordering options"
  92. );
  93. }
  94. },
  95. {
  96. name: "Handling of unsupported tags and subtags (general canonicalization)",
  97. // Intl.getCanonicalLocales does not care whether a locale tag is supported.
  98. // It simply canonicalizes all properly formatted (i.e. "valid") tags.
  99. // Therefore, anything that fits into the general language tag grammar should be canonicalized.
  100. // * ECMA 402 #sec-isstructurallyvalidlanguagetag
  101. // * (Note: The above basically just refers to RFC 5646 section 2.1)
  102. body: function () {
  103. assert.areEqual(Intl.getCanonicalLocales('en-zz'), ['en-ZZ'], "en-ZZ: English as used in [unsupported locale ZZ]");
  104. assert.areEqual(Intl.getCanonicalLocales('ZZ-us'), ['zz-US'], "zz-US: [unsupported language zz] as used in US");
  105. assert.areEqual(Intl.getCanonicalLocales('xx-abcd-zz'), ['xx-Abcd-ZZ'],
  106. "xx-Abcd-ZZ: [unsupported language xx] using [unsupported script Abcd] as used in [unsupported locale ZZ]");
  107. // TODO (doilij): Investigate what is correct/allowable here (Microsoft/ChakraCore#2964)
  108. equal("xx-zzz", "zzz", gcl("xx-zzz")[0]);
  109. // See discussion of defaulting above (V8/CC-ICU and CC-WinGlob/SM distinction remains true here)
  110. equal("xx-ZZ-u-yy-zz", "xx-ZZ-u-yy-yes-zz-yes", gcl("xx-zz-u-zz-yy")[0]);
  111. }
  112. },
  113. {
  114. name: "Rejection of duplicate tags",
  115. body: function () {
  116. const duplicateSingletons = ['cmn-hans-cn-u-u', 'cmn-hans-cn-t-u-ca-u'];
  117. const duplicateUnicodeExtensionKeys = ['de-de-u-kn-true-co-phonebk-co-phonebk'];
  118. if (WScript.Platform.INTL_LIBRARY === "icu") {
  119. const duplicateTags = ['de-gregory-gregory'];
  120. duplicateTags.forEach(testRangeError);
  121. }
  122. // duplicateTags.forEach(testRangeError);
  123. duplicateSingletons.forEach(testRangeError);
  124. duplicateUnicodeExtensionKeys.forEach(testRangeError);
  125. }
  126. },
  127. {
  128. name: "Structurally invalid tags",
  129. // * ECMA 402 #sec-canonicalizelocalelist -- step 7.c.iv. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
  130. // * ECMA 402 #sec-isstructurallyvalidlanguagetag
  131. // * (Note: The above basically just refers to RFC 5646 section 2.1)
  132. body: function () {
  133. const empty = [''];
  134. const invalidSubtags = ['en-A1'];
  135. const invalidVariants = ['en-us-latn', 'en-us-latnlatnlatn'];
  136. const invalidChars = ['en-a@'];
  137. const nonAsciiChars = ['中文', 'de-ßß'];
  138. const boundaryHyphen = ['-en', '-en-us', 'en-', 'en-us-'];
  139. const incompleteSubtags = ['de-de-u'];
  140. const extlangNotAllowedAfterScript = ['xx-abcd-zzz', 'xx-yyy-abcd-zzz', 'xx-yyy-Abcd-zzz-aa'];
  141. empty.forEach(testRangeError);
  142. invalidSubtags.forEach(testRangeError);
  143. invalidVariants.forEach(testRangeError);
  144. invalidChars.forEach(testRangeError);
  145. nonAsciiChars.forEach(testRangeError)
  146. boundaryHyphen.forEach(testRangeError);
  147. incompleteSubtags.forEach(testRangeError);
  148. extlangNotAllowedAfterScript.forEach(testRangeError);
  149. }
  150. },
  151. {
  152. name: "Bad/weird input",
  153. body: function () {
  154. // ECMA 402 #sec-canonicalizelocalelist -- step 1.a. if locales is undefined, return []
  155. assert.areEqual(Intl.getCanonicalLocales(), [], "Implicit undefined");
  156. assert.areEqual(Intl.getCanonicalLocales(undefined), [], "Explicit undefined");
  157. // There is no special case for null type inputs in the definition, so throw TypeError
  158. // ECMA 402 #sec-canonicalizelocalelist -- step 4.a. Let O be ? ToObject(locales).
  159. // ECMA 262 #sec-toobject
  160. assert.throws(function () { Intl.getCanonicalLocales(null) }, TypeError, "Cannot convert null to object.");
  161. // Test Number literals
  162. assert.areEqual(Intl.getCanonicalLocales(1), [], "Number is converted to string internally and no locale is found");
  163. assert.areEqual(Intl.getCanonicalLocales(3.14), [], "Number is converted to string internally and no locale is found");
  164. assert.areEqual(Intl.getCanonicalLocales(Infinity), [], "Number is converted to string internally and no locale is found");
  165. assert.areEqual(Intl.getCanonicalLocales(-Infinity), [], "Number is converted to string internally and no locale is found");
  166. assert.areEqual(Intl.getCanonicalLocales(NaN), [], "Number is converted to string internally and no locale is found");
  167. // Test other types of literals
  168. assert.areEqual(Intl.getCanonicalLocales(true), [], "Boolean is converted to string internally and no locale is found");
  169. assert.areEqual(Intl.getCanonicalLocales(Symbol.toStringTag), [], "Symbol is converted to string internally and no locale is found");
  170. // RegExp and Object literals
  171. assert.areEqual(Intl.getCanonicalLocales(/a/), [], "RegExp is converted to string internally and no locale is found");
  172. assert.areEqual(Intl.getCanonicalLocales(/en-us/), [], "RegExp is converted to string internally and no locale is found");
  173. assert.areEqual(Intl.getCanonicalLocales([]), [], "Object is converted to string internally and no locale is found");
  174. assert.areEqual(Intl.getCanonicalLocales({}), [], "Object is converted to string internally and no locale is found");
  175. assert.areEqual(Intl.getCanonicalLocales({ '0': 'en-us' }), [], "Object is converted to string internally and no locale is found");
  176. assert.areEqual(Intl.getCanonicalLocales(['en-us', { toString: () => 'en-us' }]), ['en-US'], "Element is an Object whose toString produces a valid language tag");
  177. assert.areEqual(Intl.getCanonicalLocales({ toString: () => 'en-us' }), [], "Argument is an Object which doesn't have any numeric indexes");
  178. // Arrays containing anything which is not String or Object type should throw.
  179. // ECMA 402 #sec-canonicalizelocalelist
  180. // * step 7.c.ii. If Type(kValue) is not String or Object, throw a TypeError exception.
  181. // * step 7.c.iii. Let tag be ? ToString(kValue).
  182. assert.throws(function () { Intl.getCanonicalLocales(['en-us', null]) }, TypeError, "null is not String or Object.");
  183. assert.throws(function () { Intl.getCanonicalLocales(['en-us', 1]) }, TypeError, "Number is not String or Object.");
  184. assert.throws(function () { Intl.getCanonicalLocales(['en-us', 3.14]) }, TypeError, "Number is not String or Object.");
  185. assert.throws(function () { Intl.getCanonicalLocales(['en-us', Infinity]) }, TypeError, "Number is not String or Object.");
  186. assert.throws(function () { Intl.getCanonicalLocales(['en-us', -Infinity]) }, TypeError, "Number is not String or Object.");
  187. assert.throws(function () { Intl.getCanonicalLocales(['en-us', NaN]) }, TypeError, "Number is not String or Object.");
  188. assert.throws(function () { Intl.getCanonicalLocales(['en-us', true]) }, TypeError, "Boolean is not String or Object.");
  189. assert.throws(function () { Intl.getCanonicalLocales(['en-us', Symbol.toStringTag]) }, TypeError, "Symbol is not String or Object.");
  190. // RegExp and Object literals
  191. // * step 7.c.iv. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
  192. assert.throws(function () { Intl.getCanonicalLocales(['en-us', /a/]) }, RangeError, "RegExp is an Object, whose toString is not a well-formed language tag.");
  193. assert.throws(function () { Intl.getCanonicalLocales(['en-us', /en-us/]) }, RangeError, "RegExp is an Object, whose toString is not a well-formed language tag.");
  194. assert.throws(function () { Intl.getCanonicalLocales(['en-us', []]) }, RangeError, "Array contained within an array. [].toString()==='' (invalid tag).");
  195. assert.throws(function () { Intl.getCanonicalLocales(['en-us', {}]) }, RangeError, "Object whose toString is not a well-formed language tag.");
  196. assert.throws(function () { Intl.getCanonicalLocales([{ '0': 'en-us' }]) }, RangeError, "Array containing object where toString() produces an invalid tag.");
  197. }
  198. },
  199. {
  200. name: "Array with holes",
  201. body: function () {
  202. let a = [];
  203. a[1] = 'en';
  204. assert.areEqual(Intl.getCanonicalLocales(a), ['en']);
  205. }
  206. },
  207. {
  208. name: "Array-like object (without holes)",
  209. body: function () {
  210. let locales = {
  211. length: 2,
  212. 0: 'zh',
  213. 1: 'en'
  214. };
  215. assert.areEqual(Intl.getCanonicalLocales(locales), ['zh', 'en']);
  216. }
  217. },
  218. {
  219. name: "Array-like object (with holes)",
  220. body: function () {
  221. let locales = {
  222. length: 2,
  223. // 0: 'zh',
  224. 1: 'en'
  225. };
  226. assert.areEqual(Intl.getCanonicalLocales(locales), ['en']);
  227. }
  228. },
  229. {
  230. name: "Array-like class with numeric getters (without holes)",
  231. body: function () {
  232. class x {
  233. get 0() { return 'zh'; }
  234. get 1() { return 'en'; }
  235. get length() { return 2; }
  236. }
  237. let locales = new x();
  238. assert.areEqual(Intl.getCanonicalLocales(locales), ['zh', 'en']);
  239. }
  240. },
  241. {
  242. name: "Array-like class with numeric getters (with holes)",
  243. body: function () {
  244. class x {
  245. // get 0() { return 'zh'; } // culture[0] is a hole
  246. get 1() { return 'en'; }
  247. get length() { return 2; }
  248. }
  249. let locales = new x();
  250. assert.areEqual(Intl.getCanonicalLocales(locales), ['en']);
  251. }
  252. },
  253. {
  254. name: "Array-like class with numeric getters (with base class closing the hole)",
  255. body: function () {
  256. class base {
  257. get 0() { return 'jp'; } // closes the hole in x
  258. }
  259. class x extends base {
  260. // get 0() { return 'zh'; } // culture[0] has a hole
  261. get 1() { return 'en'; }
  262. get length() { return 2; } // try 2 with get 0 defined in base; try 2,3 with get 2 defined in base; try 3 with get 0, get 1 defined
  263. }
  264. let locales = new x();
  265. assert.areEqual(Intl.getCanonicalLocales(locales), ['jp', 'en']);
  266. }
  267. }
  268. ];
  269. testRunner.runTests(tests, { verbose: WScript.Arguments[0] != "summary" });