regex-case-folding.js 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");
  6. var tests = [
  7. {
  8. name: "Case-folding should be applied for a single character pattern when the unicode flag is present",
  9. body: function () {
  10. assert.isTrue(/a/ui.test("A"), "UnicodeData fallback");
  11. assert.isTrue(/\u004b/ui.test("\u212a"), "Code unit");
  12. }
  13. },
  14. {
  15. name: "Case-folding should NOT be applied for a single character pattern when the unicode flag is NOT present",
  16. body: function () {
  17. assert.isFalse(/\u004b/i.test("\u212a"));
  18. }
  19. },
  20. {
  21. name: "Case-folding should be applied for a single character term when the unicode flag is present",
  22. body: function () {
  23. assert.isTrue(/aa|b/ui.test("B"), "UnicodeData fallback");
  24. assert.isTrue(/aa|\u004b/ui.test("\u212a"), "Code unit");
  25. }
  26. },
  27. {
  28. name: "Case-folding should NOT be applied for a single character term when the unicode flag is NOT present",
  29. body: function () {
  30. assert.isFalse(/aa|\u004b/i.test("\u212a"));
  31. }
  32. },
  33. {
  34. name: "Case-folding should be applied for literals using literal instruction when the unicode flag is present",
  35. body: function () {
  36. assert.isTrue(/^aaa/ui.test("aaA"), "MatchLiteralInst: UnicodeData fallback");
  37. assert.isTrue(/^aa\u004b/ui.test("aa\u212a"), "MatchLiteralInst: Code unit");
  38. assert.isTrue(/aaa/ui.test("aaA"), "SyncToLiteral...Inst: UnicodeData fallback");
  39. assert.isTrue(/aa\u004b/ui.test("aa\u212a"), "SyncToLiteral...Inst: Code unit");
  40. assert.isTrue(/aa\u{10429}/ui.test("aa\u{10401}"), "Code point in both RegExp and string to test");
  41. assert.isTrue(/aa\u{10429}/ui.test("aa\ud801\udc01"), "Code point in RegExp and surrogate pair in string to test");
  42. assert.isTrue(/aa\ud801\udc29/ui.test("aa\u{10401}"), "Surrogate pair in RegExp and code point in string to test");
  43. assert.isTrue(/aa\ud801\udc29/ui.test("aa\ud801\udc01"), "Surrogate pair in both RegExp and string to test");
  44. assert.isTrue(/aa\u{10429}\u{10429}/ui.test("aa\u{10401}\u{10401}"), "Multiple code points");
  45. assert.isTrue(/aa\ud801\udc29\ud801\udc29/ui.test("aa\ud801\udc29\ud801\udc29"), "Multiple surrogate pairs");
  46. }
  47. },
  48. {
  49. name: "Case-folding should NOT be applied for literals using literal instruction when the unicode flag is NOT present",
  50. body: function () {
  51. assert.isFalse(/^aa\u004b/i.test("aa\u212a"), "MatchLiteralInst");
  52. assert.isFalse(/aa\u004b/i.test("aa\u212a"), "SyncToLiteral...Inst");
  53. }
  54. },
  55. {
  56. name: "Case-folding should be applied for character sets when the unicode flag is present",
  57. body: function () {
  58. assert.isTrue(/^[ab]/ui.test("A"), "MatchSetInst: UnicodeData fallback");
  59. assert.isTrue(/^[a\u004b]/ui.test("\u212a"), "MatchSetInst: Code unit");
  60. assert.isTrue(/[ab]/ui.test("A"), "SyncToSet...Inst: UnicodeData fallback");
  61. assert.isTrue(/[a\u004b]/ui.test("\u212a"), "SyncToSet...Inst: Code unit");
  62. assert.isTrue(/[a\u{10429}]/ui.test("\u{10401}"), "Code point in both RegExp and in string to test");
  63. assert.isTrue(/[a\u{10429}]/ui.test("\ud801\udc01"), "Code point in RegExp and surrogate pair in string to test");
  64. assert.isTrue(/[a\ud801\udc29]/ui.test("\u{10401}"), "Surrogate pair in RegExp and code point in string to test");
  65. assert.isTrue(/[a\ud801\udc29]/ui.test("\ud801\udc01"), "Surrogate pair in both RegExp and string to test");
  66. assert.isTrue(/[\u{10428}-\u{10430}]/ui.test("\u{10401}"), "Code point range");
  67. assert.isTrue(/[\ud801\udc28-\ud801\udc30]/ui.test("\ud801\udc01"), "Surrogate pair range");
  68. }
  69. },
  70. {
  71. name: "Case-folding should NOT be applied for character sets when the unicode flag is NOT present",
  72. body: function () {
  73. assert.isFalse(/^[a\u004b]/i.test("\u212a"), "MatchSetInst");
  74. assert.isFalse(/[a\u004b]/i.test("\u212a"), "SyncToSet...Inst");
  75. }
  76. },
  77. {
  78. name: "Case-folding should be applied for back references when the unicode flag is present",
  79. body: function () {
  80. assert.isTrue(/(a)\1/ui.test("aA"), "UnicodeData fallback");
  81. assert.isTrue(/(\u004b)\1/ui.test("\u004b\u212a"), "Code unit");
  82. assert.isTrue(/(\u{10429})\1/ui.test("\u{10429}\u{10401}"), "Code point in both RegExp and string to test");
  83. assert.isTrue(/(\u{10429})\1/ui.test("\u{10429}\ud801\udc01"), "Code point in RegExp and surrogate pair in string to test");
  84. assert.isTrue(/(\ud801\udc29)\1/ui.test("\ud801\udc29\u{10401}"), "Surrogate pair in RegExp and code point in string to test");
  85. assert.isTrue(/(\ud801\udc29)\1/ui.test("\ud801\udc29\ud801\udc29"), "Surrogate pair in both RegExp and string to test");
  86. assert.isTrue(/(\u{10429}\u{10429})\1/ui.test("\u{10429}\u{10429}\u{10401}\u{10401}"), "Multiple code points");
  87. assert.isTrue(/(\ud801\udc29\ud801\udc29)\1/ui.test("\ud801\udc29\ud801\udc29\ud801\udc01\ud801\udc01"), "Multiple surrogate pairs");
  88. }
  89. },
  90. {
  91. name: "Case-folding should NOT be applied for back references when the unicode flag is NOT present",
  92. body: function () {
  93. assert.isFalse(/(\u004b)\1/i.test("\u004b\u212a"), "Code unit");
  94. }
  95. },
  96. {
  97. name: "Case-folding should be applied for quantifiers when the unicode flag is present",
  98. body: function () {
  99. assert.isTrue(/^aa(?:\u004b)?/ui.test("aa\u212a"), "?");
  100. assert.isTrue(/^aa(?:\u004b)+/ui.test("aa\u004b\u212a"), "+");
  101. assert.isTrue(/^aa(?:\u004b)*/ui.test("aa\u004b\u212a"), "*");
  102. assert.isTrue(/^aa(?:\u004b){2}/ui.test("aa\u004b\u212a"), "{2}");
  103. }
  104. },
  105. {
  106. name: "Up to four code points should be in the same case-folding equivalence group",
  107. body: function () {
  108. var equivs = ["0399", "03b9", "1fbe"];
  109. equivs.forEach(function (hex) {
  110. var equivChar = eval("'\\u" + hex + "'");
  111. assert.isTrue(/\u0345/ui.test(equivChar), "\\u0345 -> \\u" + hex + " as a single character pattern");
  112. assert.isTrue(/^\u0345/ui.test(equivChar), "MatchChar4Inst: \\u0345 -> \\u" + hex + " as a single character pattern");
  113. assert.isTrue(/aa|\u0345/ui.test(equivChar), "SyncToSetAndContinue: \\u0345 -> \\u" + hex + " as a single character term");
  114. assert.isTrue(/aa\u0345/ui.test("aa" + equivChar), "\\u0345 -> \\u" + hex + " in literal");
  115. assert.isTrue(/[a\u0345]/ui.test(equivChar), "\\u0345 -> \\u" + hex + " in character set");
  116. assert.isTrue(/(\u0345)\1/ui.test("\u0345" + equivChar), "\\u0345 -> \\u" + hex + " in back reference");
  117. });
  118. }
  119. }
  120. ];
  121. testRunner.runTests(tests, { verbose: WScript.Arguments[0] != "summary" });