GetCanonicalLocales.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Copyright (c) ChakraCore Project Contributors. All rights reserved.
  4. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  5. //-------------------------------------------------------------------------------------------------------
  6. WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");
  7. function testRangeError(tag) {
  8. assert.throws(function () { Intl.getCanonicalLocales(tag) }, RangeError,
  9. `Tag '${tag}' should throw RangeError`,
  10. `Locale '${tag}' is not well-formed`);
  11. }
  12. /**
  13. * Allows different values to be asserted depending on the Intl implementation
  14. *
  15. * @param {String|String[]} expectedWinGlob
  16. * @param {String|String[]} expectedICU
  17. * @param {String|String[]} actual
  18. * @param {String} message
  19. */
  20. const equal = (function () {
  21. if (WScript.Platform.INTL_LIBRARY === "icu") {
  22. return function (_, expectedICU, actual, message) {
  23. assert.areEqual(expectedICU, actual, message);
  24. }
  25. } else {
  26. assert.isTrue(WScript.Platform.INTL_LIBRARY === "winglob");
  27. return function (expectedWinGlob, _, actual, message) {
  28. assert.areEqual(expectedWinGlob, actual, message);
  29. }
  30. }
  31. })();
  32. const gcl = Intl.getCanonicalLocales;
  33. var tests = [
  34. {
  35. name: "Intl.getCanonicalLocales Functionality (according to ECMA 402 #sec-canonicalizelocalelist)",
  36. // ensure array (or array-like) or convert to array, canonicalize each entry, remove duplicates
  37. body: function () {
  38. // ensure output is an array even if input was not an array
  39. assert.areEqual(Intl.getCanonicalLocales('en'), ['en'], "Input is a singleton string (not an array) -> output is array");
  40. // canonicalize case
  41. assert.areEqual(Intl.getCanonicalLocales(['en']), ['en'], "Input matches output, no lookup is performed");
  42. assert.areEqual(Intl.getCanonicalLocales(['en-us']), ['en-US'], "Canonicalize country casing (en-US) (all-lowercase)");
  43. assert.areEqual(Intl.getCanonicalLocales(['en-Us']), ['en-US'], "Canonicalize country casing (en-US) (mixed-case)");
  44. assert.areEqual(Intl.getCanonicalLocales(['EN-us']), ['en-US'], "Canonicalize country casing (en-US) (completely incorrect casing)");
  45. assert.areEqual(Intl.getCanonicalLocales(['de-de']), ['de-DE'], "Canonicalize country casing (de-DE)");
  46. // array-like objects are be fine (according to spec, arrays are converted to Object anyway)
  47. // ECMA 402 #sec-canonicalizelocalelist
  48. // 5. Let len be ? ToLength(? Get(O, "length")).
  49. // 6. Let k be 0.
  50. // 7. Repeat, while k < len
  51. // Since ToLength(undefined) === 0, we don't enter the loop (essentially treat the input as a zero-length array).
  52. // ToLength(undefined) -> ToInteger(undefined) -> ToNumber(undefined) -> NaN
  53. // ToInteger converts NaN to +0.
  54. assert.areEqual(Intl.getCanonicalLocales({ '0': 'en-us' }), [], "Objects which might look like arrays are fine, but treated as 0 length.");
  55. assert.areEqual(Intl.getCanonicalLocales({ 'a': 'b' }), [], "Arbitrary Objects are fine, treated as 0-length arrays.");
  56. // Objects contained in the input array are fine if their toString is a valid language tag.
  57. assert.areEqual(Intl.getCanonicalLocales(['en-us', { toString() { return 'en-us' } }]), ['en-US'], "Object.toString returning a valid language tag is fine.");
  58. assert.throws(function () { Intl.getCanonicalLocales([{ toString() { return undefined } }]) }, RangeError,
  59. "Object.toString returning a non-string or invalid language tag is RangeError.");
  60. // canonicalization of script code subkey
  61. assert.areEqual(Intl.getCanonicalLocales(['zh-hans-cn']), ['zh-Hans-CN'], "Chinese (zh) Han Simplified (Hans) as used in China (CN)");
  62. assert.areEqual(Intl.getCanonicalLocales(['zh-hant-hk']), ['zh-Hant-HK'], "Chinese (zh) Han Traditional (Hant) as used in Hong Kong (HK)");
  63. // language-extlang form and other non-preferred forms normalize to preferred ISO 639-3
  64. // This should be handled implicitly by canonicalization routine (no knowledge of language tags required),
  65. // but we make sure it works for some actual languages in any case.
  66. // RFC 5646 2.1:
  67. // language = 2-3ALPHA ["-" extlang]
  68. // extlang = 2-3ALPHA *2("-" 3ALPHA)
  69. // https://en.wikipedia.org/wiki/IETF_language_tag#ISO_639-3_and_ISO_639-1
  70. let mandarinChinese = ['cmn', 'zh-cmn']; // Mandarin Chinese (language-extlang: zh-cmn; prefer ISO 639-3: cmn)
  71. let minNanChinese = ['nan', 'zh-nan', 'zh-min-nan']; // Min-Nan Chinese (ISO 639-3: nan)
  72. let hakkaChinese = ['hak', 'zh-hak', 'zh-hakka', 'i-hak']; // Hakka Chinese (ISO 639-3: hak)
  73. let chineseIn = [].concat(mandarinChinese, minNanChinese, hakkaChinese);
  74. let chineseOut = [].concat(mandarinChinese[0], minNanChinese[0], hakkaChinese[0]); // after de-dup should be only these three preferred codes
  75. assert.areEqual(Intl.getCanonicalLocales(chineseIn), chineseOut, "Chinese language-extlang and other forms map to preferred ISO 639-3 codes");
  76. // canonicalization of -u- extension keys
  77. // V8 and CC-ICU convert boolean keys (kn) to boolean string values (including giving them default values),
  78. // which is incorrect. SpiderMonkey and CC-WinGlob correctly avoid this.
  79. // V8 and CC-ICU also give the default value of "yes" to non-boolean keys (co), which also is incorrect.
  80. // Everyone (should) correctly re-order extension keys alphabetically
  81. // Microsoft/ChakraCore#4490 tracks the incorrect defaulting, Microsoft/ChakraCore#2964 tracks the overall investigation
  82. if (WScript.Platform.ICU_VERSION < 62) {
  83. assert.areEqual(["de-DE-u-co-yes-kn-true"], Intl.getCanonicalLocales("de-de-u-kn-co"))
  84. assert.areEqual(["de-DE-u-co-phonebk-kn-true"], Intl.getCanonicalLocales("de-de-u-kn-co-phonebk"))
  85. assert.areEqual(["de-DE-u-co-phonebk-kn-true"], Intl.getCanonicalLocales("de-DE-u-kn-yes-co-phonebk"))
  86. } else {
  87. assert.areEqual(["de-DE-u-co-kn"], Intl.getCanonicalLocales("de-de-u-kn-co"))
  88. assert.areEqual(["de-DE-u-co-phonebk-kn"], Intl.getCanonicalLocales("de-de-u-kn-co-phonebk"))
  89. assert.areEqual(["de-DE-u-co-phonebk-kn"], Intl.getCanonicalLocales("de-DE-u-kn-yes-co-phonebk"))
  90. }
  91. // De-dupe after locales are canonicalized
  92. assert.areEqual(Intl.getCanonicalLocales(['en-us', 'en-us']), ['en-US'], "No duplicates, same input casing (casing was incorrect)");
  93. assert.areEqual(Intl.getCanonicalLocales(['en-US', 'en-US']), ['en-US'], "No duplicates, same input casing (casing was correct)");
  94. assert.areEqual(Intl.getCanonicalLocales(['en-us', 'en-US']), ['en-US'], "No duplicates, different input casing");
  95. assert.areEqual(
  96. ["de-DE", "de-DE-u-co-phonebk-kn"],
  97. Intl.getCanonicalLocales(["de-de", "de-DE-u-co-phonebk-kn-true", "de-DE-u-kn-true-co-phonebk"]),
  98. "No duplicates after re-ordering options"
  99. );
  100. }
  101. },
  102. {
  103. name: "Handling of unsupported tags and subtags (general canonicalization)",
  104. // Intl.getCanonicalLocales does not care whether a locale tag is supported.
  105. // It simply canonicalizes all properly formatted (i.e. "valid") tags.
  106. // Therefore, anything that fits into the general language tag grammar should be canonicalized.
  107. // * ECMA 402 #sec-isstructurallyvalidlanguagetag
  108. // * (Note: The above basically just refers to RFC 5646 section 2.1)
  109. body: function () {
  110. assert.areEqual(Intl.getCanonicalLocales('en-zz'), ['en-ZZ'], "en-ZZ: English as used in [unsupported locale ZZ]");
  111. assert.areEqual(Intl.getCanonicalLocales('ZZ-us'), ['zz-US'], "zz-US: [unsupported language zz] as used in US");
  112. assert.areEqual(Intl.getCanonicalLocales('xx-abcd-zz'), ['xx-Abcd-ZZ'],
  113. "xx-Abcd-ZZ: [unsupported language xx] using [unsupported script Abcd] as used in [unsupported locale ZZ]");
  114. // TODO (doilij): Investigate what is correct/allowable here (Microsoft/ChakraCore#2964)
  115. equal("xx-zzz", "zzz", gcl("xx-zzz")[0]);
  116. // See discussion of defaulting above
  117. if (WScript.Platform.ICU_VERSION < 62) {
  118. assert.areEqual(["xx-ZZ-u-yy-yes-zz-yes"], Intl.getCanonicalLocales("xx-zz-u-zz-yy"));
  119. } else {
  120. assert.areEqual(["xx-ZZ-u-yy-zz"], Intl.getCanonicalLocales("xx-zz-u-zz-yy"));
  121. }
  122. }
  123. },
  124. {
  125. name: "Rejection of duplicate tags",
  126. body: function () {
  127. const duplicateSingletons = ['cmn-hans-cn-u-u', 'cmn-hans-cn-t-u-ca-u'];
  128. const duplicateUnicodeExtensionKeys = ['de-de-u-kn-true-co-phonebk-co-phonebk'];
  129. if (WScript.Platform.INTL_LIBRARY === "icu") {
  130. const duplicateTags = ['de-gregory-gregory'];
  131. duplicateTags.forEach(testRangeError);
  132. }
  133. // duplicateTags.forEach(testRangeError);
  134. duplicateSingletons.forEach(testRangeError);
  135. if (WScript.Platform.INTL_LIBRARY === "winglob") {
  136. duplicateUnicodeExtensionKeys.forEach(testRangeError);
  137. }
  138. }
  139. },
  140. {
  141. name: "Structurally invalid tags",
  142. // * ECMA 402 #sec-canonicalizelocalelist -- step 7.c.iv. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
  143. // * ECMA 402 #sec-isstructurallyvalidlanguagetag
  144. // * (Note: The above basically just refers to RFC 5646 section 2.1)
  145. body: function () {
  146. const empty = [''];
  147. const invalidSubtags = ['en-A1'];
  148. const invalidVariants = ['en-us-latn', 'en-us-latnlatnlatn'];
  149. const invalidChars = ['en-a@'];
  150. const nonAsciiChars = ['中文', 'de-ßß'];
  151. const boundaryHyphen = ['-en', '-en-us', 'en-', 'en-us-'];
  152. const incompleteSubtags = ['de-de-u'];
  153. const extlangNotAllowedAfterScript = ['xx-abcd-zzz', 'xx-yyy-abcd-zzz', 'xx-yyy-Abcd-zzz-aa'];
  154. empty.forEach(testRangeError);
  155. invalidSubtags.forEach(testRangeError);
  156. invalidVariants.forEach(testRangeError);
  157. invalidChars.forEach(testRangeError);
  158. nonAsciiChars.forEach(testRangeError)
  159. boundaryHyphen.forEach(testRangeError);
  160. incompleteSubtags.forEach(testRangeError);
  161. extlangNotAllowedAfterScript.forEach(testRangeError);
  162. }
  163. },
  164. {
  165. name: "Bad/weird input",
  166. body: function () {
  167. // ECMA 402 #sec-canonicalizelocalelist -- step 1.a. if locales is undefined, return []
  168. assert.areEqual(Intl.getCanonicalLocales(), [], "Implicit undefined");
  169. assert.areEqual(Intl.getCanonicalLocales(undefined), [], "Explicit undefined");
  170. // There is no special case for null type inputs in the definition, so throw TypeError
  171. // ECMA 402 #sec-canonicalizelocalelist -- step 4.a. Let O be ? ToObject(locales).
  172. // ECMA 262 #sec-toobject
  173. assert.throws(function () { Intl.getCanonicalLocales(null) }, TypeError, "Cannot convert null to object.");
  174. // Test Number literals
  175. assert.areEqual(Intl.getCanonicalLocales(1), [], "Number is converted to string internally and no locale is found");
  176. assert.areEqual(Intl.getCanonicalLocales(3.14), [], "Number is converted to string internally and no locale is found");
  177. assert.areEqual(Intl.getCanonicalLocales(Infinity), [], "Number is converted to string internally and no locale is found");
  178. assert.areEqual(Intl.getCanonicalLocales(-Infinity), [], "Number is converted to string internally and no locale is found");
  179. assert.areEqual(Intl.getCanonicalLocales(NaN), [], "Number is converted to string internally and no locale is found");
  180. // Test other types of literals
  181. assert.areEqual(Intl.getCanonicalLocales(true), [], "Boolean is converted to string internally and no locale is found");
  182. assert.areEqual(Intl.getCanonicalLocales(Symbol.toStringTag), [], "Symbol is converted to string internally and no locale is found");
  183. // RegExp and Object literals
  184. assert.areEqual(Intl.getCanonicalLocales(/a/), [], "RegExp is converted to string internally and no locale is found");
  185. assert.areEqual(Intl.getCanonicalLocales(/en-us/), [], "RegExp is converted to string internally and no locale is found");
  186. assert.areEqual(Intl.getCanonicalLocales([]), [], "Object is converted to string internally and no locale is found");
  187. assert.areEqual(Intl.getCanonicalLocales({}), [], "Object is converted to string internally and no locale is found");
  188. assert.areEqual(Intl.getCanonicalLocales({ '0': 'en-us' }), [], "Object is converted to string internally and no locale is found");
  189. assert.areEqual(Intl.getCanonicalLocales(['en-us', { toString: () => 'en-us' }]), ['en-US'], "Element is an Object whose toString produces a valid language tag");
  190. assert.areEqual(Intl.getCanonicalLocales({ toString: () => 'en-us' }), [], "Argument is an Object which doesn't have any numeric indexes");
  191. // Arrays containing anything which is not String or Object type should throw.
  192. // ECMA 402 #sec-canonicalizelocalelist
  193. // * step 7.c.ii. If Type(kValue) is not String or Object, throw a TypeError exception.
  194. // * step 7.c.iii. Let tag be ? ToString(kValue).
  195. assert.throws(function () { Intl.getCanonicalLocales(['en-us', null]) }, TypeError, "null is not String or Object.");
  196. assert.throws(function () { Intl.getCanonicalLocales(['en-us', 1]) }, TypeError, "Number is not String or Object.");
  197. assert.throws(function () { Intl.getCanonicalLocales(['en-us', 3.14]) }, TypeError, "Number is not String or Object.");
  198. assert.throws(function () { Intl.getCanonicalLocales(['en-us', Infinity]) }, TypeError, "Number is not String or Object.");
  199. assert.throws(function () { Intl.getCanonicalLocales(['en-us', -Infinity]) }, TypeError, "Number is not String or Object.");
  200. assert.throws(function () { Intl.getCanonicalLocales(['en-us', NaN]) }, TypeError, "Number is not String or Object.");
  201. assert.throws(function () { Intl.getCanonicalLocales(['en-us', true]) }, TypeError, "Boolean is not String or Object.");
  202. assert.throws(function () { Intl.getCanonicalLocales(['en-us', Symbol.toStringTag]) }, TypeError, "Symbol is not String or Object.");
  203. // RegExp and Object literals
  204. // * step 7.c.iv. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
  205. assert.throws(function () { Intl.getCanonicalLocales(['en-us', /a/]) }, RangeError, "RegExp is an Object, whose toString is not a well-formed language tag.");
  206. assert.throws(function () { Intl.getCanonicalLocales(['en-us', /en-us/]) }, RangeError, "RegExp is an Object, whose toString is not a well-formed language tag.");
  207. assert.throws(function () { Intl.getCanonicalLocales(['en-us', []]) }, RangeError, "Array contained within an array. [].toString()==='' (invalid tag).");
  208. assert.throws(function () { Intl.getCanonicalLocales(['en-us', {}]) }, RangeError, "Object whose toString is not a well-formed language tag.");
  209. assert.throws(function () { Intl.getCanonicalLocales([{ '0': 'en-us' }]) }, RangeError, "Array containing object where toString() produces an invalid tag.");
  210. }
  211. },
  212. {
  213. name: "Array with holes",
  214. body: function () {
  215. let a = [];
  216. a[1] = 'en';
  217. assert.areEqual(Intl.getCanonicalLocales(a), ['en']);
  218. }
  219. },
  220. {
  221. name: "Array-like object (without holes)",
  222. body: function () {
  223. let locales = {
  224. length: 2,
  225. 0: 'zh',
  226. 1: 'en'
  227. };
  228. assert.areEqual(Intl.getCanonicalLocales(locales), ['zh', 'en']);
  229. }
  230. },
  231. {
  232. name: "Array-like object (with holes)",
  233. body: function () {
  234. let locales = {
  235. length: 2,
  236. // 0: 'zh',
  237. 1: 'en'
  238. };
  239. assert.areEqual(Intl.getCanonicalLocales(locales), ['en']);
  240. }
  241. },
  242. {
  243. name: "Array-like class with numeric getters (without holes)",
  244. body: function () {
  245. class x {
  246. get 0() { return 'zh'; }
  247. get 1() { return 'en'; }
  248. get length() { return 2; }
  249. }
  250. let locales = new x();
  251. assert.areEqual(Intl.getCanonicalLocales(locales), ['zh', 'en']);
  252. }
  253. },
  254. {
  255. name: "Array-like class with numeric getters (with holes)",
  256. body: function () {
  257. class x {
  258. // get 0() { return 'zh'; } // culture[0] is a hole
  259. get 1() { return 'en'; }
  260. get length() { return 2; }
  261. }
  262. let locales = new x();
  263. assert.areEqual(Intl.getCanonicalLocales(locales), ['en']);
  264. }
  265. },
  266. {
  267. name: "Array-like class with numeric getters (with base class closing the hole)",
  268. body: function () {
  269. class base {
  270. get 0() { return 'jp'; } // closes the hole in x
  271. }
  272. class x extends base {
  273. // get 0() { return 'zh'; } // culture[0] has a hole
  274. get 1() { return 'en'; }
  275. get length() { return 2; } // try 2 with get 0 defined in base; try 2,3 with get 2 defined in base; try 3 with get 0, get 1 defined
  276. }
  277. let locales = new x();
  278. assert.areEqual(Intl.getCanonicalLocales(locales), ['jp', 'en']);
  279. }
  280. }
  281. ];
  282. testRunner.runTests(tests, { verbose: WScript.Arguments[0] != "summary" });