GetCanonicalLocales.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");
  6. function testRangeError(tag) {
  7. assert.throws(function () { Intl.getCanonicalLocales(tag) }, RangeError,
  8. `Tag '${tag}' should throw RangeError`,
  9. `Locale '${tag}' is not well-formed`);
  10. }
  11. function assertEachIsOneOf(actualList, expectedList, msg) {
  12. for (a of actualList) {
  13. assert.isTrue(expectedList.includes(a), msg);
  14. }
  15. }
  16. var tests = [
  17. {
  18. name: "Intl.getCanonicalLocales Functionality (according to ECMA 402 #sec-canonicalizelocalelist)",
  19. // ensure array (or array-like) or convert to array, canonicalize each entry, remove duplicates
  20. body: function () {
  21. // ensure output is an array even if input was not an array
  22. assert.areEqual(Intl.getCanonicalLocales('en'), ['en'], "Input is a singleton string (not an array) -> output is array");
  23. // canonicalize case
  24. assert.areEqual(Intl.getCanonicalLocales(['en']), ['en'], "Input matches output, no lookup is performed");
  25. assert.areEqual(Intl.getCanonicalLocales(['en-us']), ['en-US'], "Canonicalize country casing (en-US) (all-lowercase)");
  26. assert.areEqual(Intl.getCanonicalLocales(['en-Us']), ['en-US'], "Canonicalize country casing (en-US) (mixed-case)");
  27. assert.areEqual(Intl.getCanonicalLocales(['EN-us']), ['en-US'], "Canonicalize country casing (en-US) (completely incorrect casing)");
  28. assert.areEqual(Intl.getCanonicalLocales(['de-de']), ['de-DE'], "Canonicalize country casing (de-DE)");
  29. // array-like objects are be fine (according to spec, arrays are converted to Object anyway)
  30. // ECMA 402 #sec-canonicalizelocalelist
  31. // 5. Let len be ? ToLength(? Get(O, "length")).
  32. // 6. Let k be 0.
  33. // 7. Repeat, while k < len
  34. // Since ToLength(undefined) === 0, we don't enter the loop (essentially treat the input as a zero-length array).
  35. // ToLength(undefined) -> ToInteger(undefined) -> ToNumber(undefined) -> NaN
  36. // ToInteger converts NaN to +0.
  37. assert.areEqual(Intl.getCanonicalLocales({ '0': 'en-us' }), [], "Objects which might look like arrays are fine, but treated as 0 length.");
  38. assert.areEqual(Intl.getCanonicalLocales({ 'a': 'b' }), [], "Arbitrary Objects are fine, treated as 0-length arrays.");
  39. // Objects contained in the input array are fine if their toString is a valid language tag.
  40. assert.areEqual(Intl.getCanonicalLocales(['en-us', { toString() { return 'en-us' } }]), ['en-US'], "Object.toString returning a valid language tag is fine.");
  41. assert.throws(function () { Intl.getCanonicalLocales([{ toString() { return undefined } }]) }, RangeError,
  42. "Object.toString returning a non-string or invalid language tag is RangeError.");
  43. // canonicalization of script code subkey
  44. assert.areEqual(Intl.getCanonicalLocales(['zh-hans-cn']), ['zh-Hans-CN'], "Chinese (zh) Han Simplified (Hans) as used in China (CN)");
  45. assert.areEqual(Intl.getCanonicalLocales(['zh-hant-hk']), ['zh-Hant-HK'], "Chinese (zh) Han Traditional (Hant) as used in Hong Kong (HK)");
  46. // language-extlang form and other non-preferred forms normalize to preferred ISO 639-3
  47. // This should be handled implicitly by canonicalization routine (no knowledge of language tags required),
  48. // but we make sure it works for some actual languages in any case.
  49. // RFC 5646 2.1:
  50. // language = 2-3ALPHA ["-" extlang]
  51. // extlang = 2-3ALPHA *2("-" 3ALPHA)
  52. // https://en.wikipedia.org/wiki/IETF_language_tag#ISO_639-3_and_ISO_639-1
  53. let mandarinChinese = ['cmn', 'zh-cmn']; // Mandarin Chinese (language-extlang: zh-cmn; prefer ISO 639-3: cmn)
  54. let minNanChinese = ['nan', 'zh-nan', 'zh-min-nan']; // Min-Nan Chinese (ISO 639-3: nan)
  55. let hakkaChinese = ['hak', 'zh-hak', 'zh-hakka', 'i-hak']; // Hakka Chinese (ISO 639-3: hak)
  56. let chineseIn = [].concat(mandarinChinese, minNanChinese, hakkaChinese);
  57. let chineseOut = [].concat(mandarinChinese[0], minNanChinese[0], hakkaChinese[0]); // after de-dup should be only these three preferred codes
  58. assert.areEqual(Intl.getCanonicalLocales(chineseIn), chineseOut, "Chinese language-extlang and other forms map to preferred ISO 639-3 codes");
  59. // canonicalization of -u- extension keys
  60. const DE_U_INCORRECT = 'de-de-u-kn-true-co-phonebk';
  61. const DE_U_CORRECT = 'de-DE-u-co-phonebk-kn-true';
  62. assert.areEqual(Intl.getCanonicalLocales(DE_U_INCORRECT), [DE_U_CORRECT], "Casing and reordering keys (input string)");
  63. assert.areEqual(Intl.getCanonicalLocales([DE_U_INCORRECT]), [DE_U_CORRECT], "Casing and reordering keys (input singleton)");
  64. assert.areEqual(Intl.getCanonicalLocales(['en-us', DE_U_INCORRECT]), ['en-US', DE_U_CORRECT], "Casing and reordering keys (input multiple)");
  65. // TODO (doilij): Investigate what is correct/allowable here (Microsoft/ChakraCore#2964)
  66. const DE_U_CORRECT_VARIANT = 'de-DE-u-co-phonebk-kn-yes';
  67. assertEachIsOneOf(Intl.getCanonicalLocales(DE_U_CORRECT_VARIANT), [
  68. DE_U_CORRECT_VARIANT, // ch; Firefox/SM
  69. DE_U_CORRECT, // Chrome/v8/node
  70. ]);
  71. // canonicalization of -u- extension keys with no explicit values
  72. // TODO (doilij): Investigate what is correct/allowable here (Microsoft/ChakraCore#2964)
  73. assertEachIsOneOf(Intl.getCanonicalLocales('de-de-u-kn-co'), [
  74. 'de-DE-u-co-kn', // ch (WinGlob)
  75. 'de-DE-u-co-yes-kn-yes', // ch (ICU)
  76. 'de-DE-u-co-yes-kn-true', // Chrome/v8/node
  77. 'de-DE-u-kn-co', // Firefox/SM
  78. ]);
  79. // no duplicates
  80. assert.areEqual(Intl.getCanonicalLocales(['en-us', 'en-us']), ['en-US'], "No duplicates, same input casing (casing was incorrect)");
  81. assert.areEqual(Intl.getCanonicalLocales(['en-US', 'en-US']), ['en-US'], "No duplicates, same input casing (casing was correct)");
  82. assert.areEqual(Intl.getCanonicalLocales(['en-us', 'en-US']), ['en-US'], "No duplicates, different input casing");
  83. // locale includes all options, don't de-dupe locales with and without options, but do de-dupe same options after canonicalization
  84. assert.areEqual(Intl.getCanonicalLocales(['de-de', DE_U_CORRECT, DE_U_INCORRECT]), ['de-DE', DE_U_CORRECT],
  85. "de-dupe canonicalized locales, but not locales with and without options");
  86. }
  87. },
  88. {
  89. name: "Handling of unsupported tags and subtags (general canonicalization)",
  90. // Intl.getCanonicalLocales does not care whether a locale tag is supported.
  91. // It simply canonicalizes all properly formatted (i.e. "valid") tags.
  92. // Therefore, anything that fits into the general language tag grammar should be canonicalized.
  93. // * ECMA 402 #sec-isstructurallyvalidlanguagetag
  94. // * (Note: The above basically just refers to RFC 5646 section 2.1)
  95. body: function () {
  96. assert.areEqual(Intl.getCanonicalLocales('en-zz'), ['en-ZZ'], "en-ZZ: English as used in [unsupported locale ZZ]");
  97. assert.areEqual(Intl.getCanonicalLocales('ZZ-us'), ['zz-US'], "zz-US: [unsupported language zz] as used in US");
  98. assert.areEqual(Intl.getCanonicalLocales('xx-abcd-zz'), ['xx-Abcd-ZZ'],
  99. "xx-Abcd-ZZ: [unsupported language xx] using [unsupported script Abcd] as used in [unsupported locale ZZ]");
  100. // TODO (doilij): Investigate what is correct/allowable here (Microsoft/ChakraCore#2964)
  101. assertEachIsOneOf(Intl.getCanonicalLocales('xx-zzz'), [
  102. 'xx-zzz', // ch (WinGlob), Firefox/SM
  103. 'zzz' // ch (ICU), Chrome/v8/node
  104. ]);
  105. // TODO (doilij): Investigate what is correct/allowable here (Microsoft/ChakraCore#2964)
  106. assertEachIsOneOf(Intl.getCanonicalLocales('xx-zz-u-zz-yy'), [
  107. 'xx-ZZ-u-yy-yes-zz-yes', // Chrome/v8/node (reordering; defaulting)
  108. 'xx-ZZ-u-yy-zz', // ch (ICU) (reordering; no defaulting)
  109. 'xx-ZZ-u-zz-yy', // Firefox/SM (no reordering; no defaulting)
  110. ]);
  111. }
  112. },
  113. {
  114. name: "Rejection of duplicate tags",
  115. body: function () {
  116. // TODO: Enable this test when Microsoft/ChakraCore#2961 is fixed.
  117. // const duplicateTags = ['de-gregory-gregory'];
  118. const duplicateSingletons = ['cmn-hans-cn-u-u', 'cmn-hans-cn-t-u-ca-u'];
  119. const duplicateUnicodeExtensionKeys = ['de-de-u-kn-true-co-phonebk-co-phonebk'];
  120. // duplicateTags.forEach(testRangeError);
  121. duplicateSingletons.forEach(testRangeError);
  122. duplicateUnicodeExtensionKeys.forEach(testRangeError);
  123. }
  124. },
  125. {
  126. name: "Structurally invalid tags",
  127. // * ECMA 402 #sec-canonicalizelocalelist -- step 7.c.iv. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
  128. // * ECMA 402 #sec-isstructurallyvalidlanguagetag
  129. // * (Note: The above basically just refers to RFC 5646 section 2.1)
  130. body: function () {
  131. const empty = [''];
  132. const invalidSubtags = ['en-A1'];
  133. const invalidChars = ['en-a@'];
  134. const nonAsciiChars = ['中文', 'de-ßß'];
  135. const boundaryHyphen = ['-en', '-en-us', 'en-', 'en-us-'];
  136. const incompleteSubtags = ['de-de-u'];
  137. const extlangNotAllowedAfterScript = ['xx-abcd-zzz', 'xx-yyy-abcd-zzz', 'xx-yyy-Abcd-zzz-aa'];
  138. empty.forEach(testRangeError);
  139. invalidSubtags.forEach(testRangeError);
  140. invalidChars.forEach(testRangeError);
  141. nonAsciiChars.forEach(testRangeError)
  142. boundaryHyphen.forEach(testRangeError);
  143. incompleteSubtags.forEach(testRangeError);
  144. extlangNotAllowedAfterScript.forEach(testRangeError);
  145. }
  146. },
  147. {
  148. name: "Bad/weird input",
  149. body: function () {
  150. // ECMA 402 #sec-canonicalizelocalelist -- step 1.a. if locales is undefined, return []
  151. assert.areEqual(Intl.getCanonicalLocales(), [], "Implicit undefined");
  152. assert.areEqual(Intl.getCanonicalLocales(undefined), [], "Explicit undefined");
  153. // There is no special case for null type inputs in the definition, so throw TypeError
  154. // ECMA 402 #sec-canonicalizelocalelist -- step 4.a. Let O be ? ToObject(locales).
  155. // ECMA 262 #sec-toobject
  156. assert.throws(function () { Intl.getCanonicalLocales(null) }, TypeError, "Cannot convert null to object.");
  157. // Test Number literals
  158. assert.areEqual(Intl.getCanonicalLocales(1), [], "Number is converted to string internally and no locale is found");
  159. assert.areEqual(Intl.getCanonicalLocales(3.14), [], "Number is converted to string internally and no locale is found");
  160. assert.areEqual(Intl.getCanonicalLocales(Infinity), [], "Number is converted to string internally and no locale is found");
  161. assert.areEqual(Intl.getCanonicalLocales(-Infinity), [], "Number is converted to string internally and no locale is found");
  162. assert.areEqual(Intl.getCanonicalLocales(NaN), [], "Number is converted to string internally and no locale is found");
  163. // Test other types of literals
  164. assert.areEqual(Intl.getCanonicalLocales(true), [], "Boolean is converted to string internally and no locale is found");
  165. assert.areEqual(Intl.getCanonicalLocales(Symbol.toStringTag), [], "Symbol is converted to string internally and no locale is found");
  166. // RegExp and Object literals
  167. assert.areEqual(Intl.getCanonicalLocales(/a/), [], "RegExp is converted to string internally and no locale is found");
  168. assert.areEqual(Intl.getCanonicalLocales(/en-us/), [], "RegExp is converted to string internally and no locale is found");
  169. assert.areEqual(Intl.getCanonicalLocales([]), [], "Object is converted to string internally and no locale is found");
  170. assert.areEqual(Intl.getCanonicalLocales({}), [], "Object is converted to string internally and no locale is found");
  171. assert.areEqual(Intl.getCanonicalLocales({ '0': 'en-us' }), [], "Object is converted to string internally and no locale is found");
  172. assert.areEqual(Intl.getCanonicalLocales(['en-us', { toString: () => 'en-us' }]), ['en-US'], "Element is an Object whose toString produces a valid language tag");
  173. assert.areEqual(Intl.getCanonicalLocales({ toString: () => 'en-us' }), [], "Argument is an Object which doesn't have any numeric indexes");
  174. // Arrays containing anything which is not String or Object type should throw.
  175. // ECMA 402 #sec-canonicalizelocalelist
  176. // * step 7.c.ii. If Type(kValue) is not String or Object, throw a TypeError exception.
  177. // * step 7.c.iii. Let tag be ? ToString(kValue).
  178. assert.throws(function () { Intl.getCanonicalLocales(['en-us', null]) }, TypeError, "null is not String or Object.");
  179. assert.throws(function () { Intl.getCanonicalLocales(['en-us', 1]) }, TypeError, "Number is not String or Object.");
  180. assert.throws(function () { Intl.getCanonicalLocales(['en-us', 3.14]) }, TypeError, "Number is not String or Object.");
  181. assert.throws(function () { Intl.getCanonicalLocales(['en-us', Infinity]) }, TypeError, "Number is not String or Object.");
  182. assert.throws(function () { Intl.getCanonicalLocales(['en-us', -Infinity]) }, TypeError, "Number is not String or Object.");
  183. assert.throws(function () { Intl.getCanonicalLocales(['en-us', NaN]) }, TypeError, "Number is not String or Object.");
  184. assert.throws(function () { Intl.getCanonicalLocales(['en-us', true]) }, TypeError, "Boolean is not String or Object.");
  185. assert.throws(function () { Intl.getCanonicalLocales(['en-us', Symbol.toStringTag]) }, TypeError, "Symbol is not String or Object.");
  186. // RegExp and Object literals
  187. // * step 7.c.iv. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
  188. assert.throws(function () { Intl.getCanonicalLocales(['en-us', /a/]) }, RangeError, "RegExp is an Object, whose toString is not a well-formed language tag.");
  189. assert.throws(function () { Intl.getCanonicalLocales(['en-us', /en-us/]) }, RangeError, "RegExp is an Object, whose toString is not a well-formed language tag.");
  190. assert.throws(function () { Intl.getCanonicalLocales(['en-us', []]) }, RangeError, "Array contained within an array. [].toString()==='' (invalid tag).");
  191. assert.throws(function () { Intl.getCanonicalLocales(['en-us', {}]) }, RangeError, "Object whose toString is not a well-formed language tag.");
  192. assert.throws(function () { Intl.getCanonicalLocales([{ '0': 'en-us' }]) }, RangeError, "Array containing object where toString() produces an invalid tag.");
  193. }
  194. },
  195. {
  196. name: "Array with holes",
  197. body: function () {
  198. let a = [];
  199. a[1] = 'en';
  200. assert.areEqual(Intl.getCanonicalLocales(a), ['en']);
  201. }
  202. },
  203. {
  204. name: "Array-like object (without holes)",
  205. body: function () {
  206. let locales = {
  207. length: 2,
  208. 0: 'zh',
  209. 1: 'en'
  210. };
  211. assert.areEqual(Intl.getCanonicalLocales(locales), ['zh', 'en']);
  212. }
  213. },
  214. {
  215. name: "Array-like object (with holes)",
  216. body: function () {
  217. let locales = {
  218. length: 2,
  219. // 0: 'zh',
  220. 1: 'en'
  221. };
  222. assert.areEqual(Intl.getCanonicalLocales(locales), ['en']);
  223. }
  224. },
  225. {
  226. name: "Array-like class with numeric getters (without holes)",
  227. body: function () {
  228. class x {
  229. get 0() { return 'zh'; }
  230. get 1() { return 'en'; }
  231. get length() { return 2; }
  232. }
  233. let locales = new x();
  234. assert.areEqual(Intl.getCanonicalLocales(locales), ['zh', 'en']);
  235. }
  236. },
  237. {
  238. name: "Array-like class with numeric getters (with holes)",
  239. body: function () {
  240. class x {
  241. // get 0() { return 'zh'; } // culture[0] is a hole
  242. get 1() { return 'en'; }
  243. get length() { return 2; }
  244. }
  245. let locales = new x();
  246. assert.areEqual(Intl.getCanonicalLocales(locales), ['en']);
  247. }
  248. },
  249. {
  250. name: "Array-like class with numeric getters (with base class closing the hole)",
  251. body: function () {
  252. class base {
  253. get 0() { return 'jp'; } // closes the hole in x
  254. }
  255. class x extends base {
  256. // get 0() { return 'zh'; } // culture[0] has a hole
  257. get 1() { return 'en'; }
  258. get length() { return 2; } // try 2 with get 0 defined in base; try 2,3 with get 2 defined in base; try 3 with get 0, get 1 defined
  259. }
  260. let locales = new x();
  261. assert.areEqual(Intl.getCanonicalLocales(locales), ['jp', 'en']);
  262. }
  263. }
  264. ];
  265. testRunner.runTests(tests, { verbose: WScript.Arguments[0] != "summary" });