Chars.h 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. namespace UnifiedRegex
  7. {
  8. template <typename C>
  9. struct Chars
  10. {
  11. typedef C Char;
  12. };
  13. template <>
  14. struct Chars<uint8>
  15. {
  16. typedef uint8 Char;
  17. typedef uint8 UChar;
  18. static const int CharWidth = sizeof(char) * 8;
  19. static const int NumChars = 1 << CharWidth;
  20. static const uint MaxUChar = (uint8)-1;
  21. static const uint MaxUCharAscii = (1 << 7) - 1;
  22. static const Char MinChar = (Char)0;
  23. static const Char MaxChar = (Char)MaxUChar;
  24. // Char to unsigned int
  25. static inline uint CTU(Char c)
  26. {
  27. return (uint)c;
  28. }
  29. // Unsigned int to Char
  30. static inline Char UTC(uint u) {
  31. Assert(u <= MaxUChar);
  32. return (Char)u;
  33. }
  34. // int to Char
  35. static inline Char ITC(int i) {
  36. Assert(i >= 0 && i <= MaxUChar);
  37. return (Char)i;
  38. }
  39. // Char to char16
  40. static inline char16 CTW(Char c)
  41. {
  42. return (char16)c;
  43. }
  44. // Offset, same buffer
  45. static inline CharCount OSB(const Char* ph, const Char* pl)
  46. {
  47. Assert(ph >= pl && ph - pl <= MaxCharCount);
  48. return (CharCount)(ph - pl);
  49. }
  50. static inline Char Shift(Char c, int n)
  51. {
  52. return UTC(CTU(c) + n);
  53. }
  54. };
  55. template <>
  56. struct Chars<char>
  57. {
  58. typedef char Char;
  59. typedef uint8 UChar;
  60. static const int CharWidth = sizeof(char) * 8;
  61. static const int NumChars = 1 << CharWidth;
  62. static const uint MaxUChar = (uint8)-1;
  63. static const uint MaxUCharAscii = (1 << 7) - 1;
  64. static const Char MinChar = (Char)0;
  65. static const Char MaxChar = (Char)MaxUChar;
  66. // Char to unsigned int
  67. static inline uint CTU(Char c)
  68. {
  69. return (uint8)c;
  70. }
  71. // Unsigned int to Char
  72. static inline Char UTC(uint u) {
  73. Assert(u <= MaxUChar);
  74. return (Char)u;
  75. }
  76. // int to Char
  77. static inline Char ITC(int i) {
  78. Assert(i >= 0 && i <= MaxUChar);
  79. return (Char)(uint8)i;
  80. }
  81. // Char to char16
  82. static inline char16 CTW(Char c)
  83. {
  84. return (char16)(uint8)c;
  85. }
  86. // Offset, same buffer
  87. static inline CharCount OSB(const Char* ph, const Char* pl)
  88. {
  89. Assert(ph >= pl && ph - pl <= MaxCharCount);
  90. return (CharCount)(ph - pl);
  91. }
  92. static inline Char Shift(Char c, int n)
  93. {
  94. return UTC(CTU(c) + n);
  95. }
  96. };
  97. template <>
  98. struct Chars<char16>
  99. {
  100. typedef char16 Char;
  101. typedef uint16 UChar;
  102. static const int CharWidth = sizeof(char16) * 8;
  103. static const int NumChars = 1 << CharWidth;
  104. static const uint MaxUChar = (uint16)-1;
  105. static const uint MaxUCharAscii = (1 << 7) - 1;
  106. static const Char MinChar = (Char)0;
  107. static const Char MaxChar = (Char)MaxUChar;
  108. // Char to unsigned int
  109. static inline uint CTU(Char c)
  110. {
  111. return (uint16)c;
  112. }
  113. // Unsigned int to Char
  114. static inline Char UTC(uint u)
  115. {
  116. Assert(u <= MaxUChar);
  117. return (Char)u;
  118. }
  119. // int to Char
  120. static inline Char ITC(int i) {
  121. Assert(i >= 0 && i <= MaxUChar);
  122. return (Char)(uint16)i;
  123. }
  124. // Char to char16
  125. static inline char16 CTW(Char c)
  126. {
  127. return c;
  128. }
  129. // Offset, same buffer
  130. static inline CharCount OSB(const Char* ph, const Char* pl)
  131. {
  132. Assert(ph >= pl && ph - pl <= MaxCharCount);
  133. return (CharCount)(ph - pl);
  134. }
  135. static inline Char Shift(Char c, int n)
  136. {
  137. return UTC(CTU(c) + n);
  138. }
  139. };
  140. template <>
  141. struct Chars<codepoint_t>
  142. {
  143. typedef codepoint_t Char;
  144. typedef codepoint_t UChar;
  145. static const int CharWidth = sizeof(codepoint_t) * 8;
  146. static const int NumChars = 0x110000;
  147. static const uint MaxUChar = (NumChars) - 1;
  148. static const uint MaxUCharAscii = (1 << 7) - 1;
  149. static const Char MinChar = (Char)0;
  150. static const Char MaxChar = (Char)MaxUChar;
  151. // Char to unsigned int
  152. static inline uint CTU(Char c)
  153. {
  154. Assert(c <= MaxChar);
  155. return (codepoint_t)c;
  156. }
  157. // Unsigned int to Char
  158. static inline Char UTC(uint u)
  159. {
  160. Assert(u <= MaxUChar);
  161. return (Char)u;
  162. }
  163. // int to Char
  164. static inline Char ITC(int i) {
  165. Assert(i >= 0 && i <= MaxUChar);
  166. return (Char)(codepoint_t)i;
  167. }
  168. // Char to char16
  169. static inline char16 CTW(Char c)
  170. {
  171. Assert(c < Chars<char16>::MaxUChar);
  172. return (char16)c;
  173. }
  174. // Offset, same buffer
  175. static inline CharCount OSB(const Char* ph, const Char* pl)
  176. {
  177. Assert(ph >= pl && ph - pl <= MaxCharCount);
  178. return (CharCount)(ph - pl);
  179. }
  180. static inline Char Shift(Char c, int n)
  181. {
  182. return UTC(CTU(c) + n);
  183. }
  184. };
  185. }