|
|
@@ -5,9 +5,16 @@
|
|
|
|
|
|
#include "RuntimePlatformAgnosticPch.h"
|
|
|
#include "UnicodeText.h"
|
|
|
+#ifdef HAS_REAL_ICU
|
|
|
#include <unicode/uchar.h>
|
|
|
#include <unicode/ustring.h>
|
|
|
#include <unicode/normalizer2.h>
|
|
|
+#else
|
|
|
+#define UErrorCode int
|
|
|
+#define U_ZERO_ERROR 0
|
|
|
+#define UChar char16
|
|
|
+#include <string.h>
|
|
|
+#endif
|
|
|
|
|
|
namespace PlatformAgnostic
|
|
|
{
|
|
|
@@ -20,6 +27,7 @@ namespace PlatformAgnostic
|
|
|
static_assert(sizeof(char16) == sizeof(UChar),
|
|
|
"This implementation depends on ICU char size matching char16's size");
|
|
|
|
|
|
+#ifdef HAS_REAL_ICU
|
|
|
// Helper ICU conversion facilities
|
|
|
static const Normalizer2* TranslateToICUNormalizer(NormalizationForm normalizationForm)
|
|
|
{
|
|
|
@@ -49,10 +57,10 @@ namespace PlatformAgnostic
|
|
|
return normalizer;
|
|
|
}
|
|
|
|
|
|
- //
|
|
|
+ //
|
|
|
// Check if a UTF16 string is valid according to the UTF16 standard
|
|
|
// Specifically, check that we don't have any invalid surrogate pairs
|
|
|
- // If the string is valid, we return true.
|
|
|
+ // If the string is valid, we return true.
|
|
|
// If not, we set invalidIndex to the index of the first invalid char index
|
|
|
// and return false
|
|
|
// If the invalid char is a lead surrogate pair, we return its index
|
|
|
@@ -80,7 +88,7 @@ namespace PlatformAgnostic
|
|
|
if (c == 0)
|
|
|
{
|
|
|
return true;
|
|
|
- }
|
|
|
+ }
|
|
|
if (U_IS_SURROGATE(c))
|
|
|
{
|
|
|
if (U16_IS_LEAD(c))
|
|
|
@@ -88,11 +96,11 @@ namespace PlatformAgnostic
|
|
|
*invalidIndex = i;
|
|
|
}
|
|
|
else
|
|
|
- {
|
|
|
+ {
|
|
|
Assert(i > 0);
|
|
|
*invalidIndex = i - 1;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
@@ -100,7 +108,7 @@ namespace PlatformAgnostic
|
|
|
{
|
|
|
return true;
|
|
|
}
|
|
|
- }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static ApiError TranslateUErrorCode(UErrorCode icuError)
|
|
|
@@ -133,7 +141,7 @@ namespace PlatformAgnostic
|
|
|
Assert(destString != nullptr || destLength == 0);
|
|
|
|
|
|
// This is semantically different than the Win32 NormalizeString API
|
|
|
- // For our usage, we always pass in the length rather than letting Windows
|
|
|
+ // For our usage, we always pass in the length rather than letting Windows
|
|
|
// calculate the length for us
|
|
|
Assert(sourceLength > 0);
|
|
|
Assert(destLength >= 0);
|
|
|
@@ -179,7 +187,7 @@ namespace PlatformAgnostic
|
|
|
*pErrorOut = TranslateUErrorCode(errorCode);
|
|
|
return -1;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
return normalizedStringLength;
|
|
|
}
|
|
|
|
|
|
@@ -193,7 +201,7 @@ namespace PlatformAgnostic
|
|
|
{
|
|
|
length = u_strlen((const UChar*) testString);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// On Windows, IsNormalizedString returns failure if the string
|
|
|
// is a malformed utf16 string. Maintain the same behavior here.
|
|
|
size_t invalidIndex = 0;
|
|
|
@@ -201,7 +209,7 @@ namespace PlatformAgnostic
|
|
|
{
|
|
|
return false;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
const Normalizer2* normalizer = TranslateToICUNormalizer(normalizationForm);
|
|
|
Assert(normalizer != nullptr);
|
|
|
|
|
|
@@ -212,24 +220,83 @@ namespace PlatformAgnostic
|
|
|
return isNormalized;
|
|
|
}
|
|
|
|
|
|
- // Since we're using the ICU here, this is trivially true
|
|
|
- bool IsExternalUnicodeLibraryAvailable()
|
|
|
+ bool IsWhitespace(codepoint_t ch)
|
|
|
{
|
|
|
- return true;
|
|
|
+ return u_isUWhiteSpace(ch) == 1;
|
|
|
}
|
|
|
|
|
|
+ int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut)
|
|
|
+ {
|
|
|
+ int32_t resultStringLength = 0;
|
|
|
+ UErrorCode errorCode = U_ZERO_ERROR;
|
|
|
+
|
|
|
+ static_assert(sizeof(UChar) == sizeof(char16), "Unexpected char type from ICU, function might have to be updated");
|
|
|
+ if (caseFlags == CaseFlagsUpper)
|
|
|
+ {
|
|
|
+ resultStringLength = u_strToUpper((UChar*) destString, destLength,
|
|
|
+ (UChar*) sourceString, sourceLength, NULL, &errorCode);
|
|
|
+ }
|
|
|
+ else if (caseFlags == CaseFlagsLower)
|
|
|
+ {
|
|
|
+ resultStringLength = u_strToLower((UChar*) destString, destLength,
|
|
|
+ (UChar*) sourceString, sourceLength, NULL, &errorCode);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ Assert(false);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (U_FAILURE(errorCode))
|
|
|
+ {
|
|
|
+ *pErrorOut = TranslateUErrorCode(errorCode);
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Todo: check for resultStringLength > destLength
|
|
|
+ // Return insufficient buffer in that case
|
|
|
+ return resultStringLength;
|
|
|
+ }
|
|
|
+#else
|
|
|
bool IsWhitespace(codepoint_t ch)
|
|
|
{
|
|
|
- return u_isUWhiteSpace(ch) == 1;
|
|
|
+ // todo : fix this!!!
|
|
|
+ char *asc = (char*)&ch;
|
|
|
+ return asc[0] == ' ' || asc[0] == '\n' || asc[0] == '\t' || asc[0] == '\r';
|
|
|
+ }
|
|
|
+
|
|
|
+ bool IsNormalizedString(NormalizationForm normalizationForm, const char16* testString, int32 testStringLength) {
|
|
|
+ // TODO: implement this
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+#define EMPTY_COPY \
|
|
|
+ const int len = (destLength <= sourceLength) ? destLength - 1 : sourceLength; \
|
|
|
+ memcpy(destString, sourceString, len * sizeof(char16)); \
|
|
|
+ destString[len] = char16(0); \
|
|
|
+ *pErrorOut = NoError; \
|
|
|
+ return len;
|
|
|
+
|
|
|
+ int32 NormalizeString(NormalizationForm normalizationForm, const char16* sourceString, uint32 sourceLength, char16* destString, int32 destLength, ApiError* pErrorOut)
|
|
|
+ {
|
|
|
+ // TODO: implement this
|
|
|
+ EMPTY_COPY
|
|
|
+ }
|
|
|
+
|
|
|
+ int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut)
|
|
|
+ {
|
|
|
+ // TODO: implement this
|
|
|
+ EMPTY_COPY
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
bool IsIdStart(codepoint_t ch)
|
|
|
{
|
|
|
+#ifdef HAS_REAL_ICU
|
|
|
if (u_isIDStart(ch))
|
|
|
{
|
|
|
return true;
|
|
|
}
|
|
|
-
|
|
|
+#endif
|
|
|
// Following codepoints are treated as part of ID_Start
|
|
|
// for backwards compatibility as per section 2.5 of the Unicode 8 spec
|
|
|
// See http://www.unicode.org/reports/tr31/tr31-23.html#Backward_Compatibility
|
|
|
@@ -243,14 +310,15 @@ namespace PlatformAgnostic
|
|
|
default: return false;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
bool IsIdContinue(codepoint_t ch)
|
|
|
{
|
|
|
+#ifdef HAS_REAL_ICU
|
|
|
if (u_hasBinaryProperty(ch, UCHAR_ID_CONTINUE) == 1)
|
|
|
{
|
|
|
return true;
|
|
|
}
|
|
|
-
|
|
|
+#endif
|
|
|
// Following codepoints are treated as part of ID_Continue
|
|
|
// for backwards compatibility as per section 2.5 of the Unicode 8 spec
|
|
|
// See http://www.unicode.org/reports/tr31/tr31-23.html#Backward_Compatibility
|
|
|
@@ -273,8 +341,47 @@ namespace PlatformAgnostic
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ uint32 ChangeStringCaseInPlace(CaseFlags caseFlags, char16* stringToChange, uint32 bufferLength)
|
|
|
+ {
|
|
|
+#ifndef HAS_REAL_ICU
|
|
|
+ return bufferLength;
|
|
|
+#else
|
|
|
+ // Assert pointers
|
|
|
+ Assert(stringToChange != nullptr);
|
|
|
+ ApiError error = NoError;
|
|
|
+
|
|
|
+ if (bufferLength == 0 || stringToChange == nullptr)
|
|
|
+ {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ int32 ret = ChangeStringLinguisticCase(caseFlags, stringToChange, bufferLength, stringToChange, bufferLength, &error);
|
|
|
+
|
|
|
+ // Callers to this function don't expect any errors
|
|
|
+ Assert(error == ApiError::NoError);
|
|
|
+ Assert(ret > 0);
|
|
|
+ return (uint32) ret;
|
|
|
+#endif
|
|
|
+ }
|
|
|
+
|
|
|
+ int LogicalStringCompare(const char16* string1, const char16* string2)
|
|
|
+ {
|
|
|
+ return PlatformAgnostic::UnicodeText::Internal::LogicalStringCompareImpl<char16>(string1, string2);
|
|
|
+ }
|
|
|
+
|
|
|
+ bool IsExternalUnicodeLibraryAvailable()
|
|
|
+ {
|
|
|
+#if defined(HAS_REAL_ICU)
|
|
|
+ // Since we're using the ICU here, this is trivially true
|
|
|
+ return true;
|
|
|
+#else
|
|
|
+ return false;
|
|
|
+#endif
|
|
|
+ }
|
|
|
+
|
|
|
UnicodeGeneralCategoryClass GetGeneralCategoryClass(codepoint_t ch)
|
|
|
{
|
|
|
+#ifdef HAS_REAL_ICU
|
|
|
int8_t charType = u_charType(ch);
|
|
|
|
|
|
if (charType == U_LOWERCASE_LETTER ||
|
|
|
@@ -321,7 +428,7 @@ namespace PlatformAgnostic
|
|
|
{
|
|
|
return UnicodeGeneralCategoryClass::CategoryClassConnectorPunctuation;
|
|
|
}
|
|
|
-
|
|
|
+#endif
|
|
|
return UnicodeGeneralCategoryClass::CategoryClassOther;
|
|
|
}
|
|
|
|
|
|
@@ -330,13 +437,14 @@ namespace PlatformAgnostic
|
|
|
// Windows and Linux
|
|
|
CharacterClassificationType GetLegacyCharacterClassificationType(char16 character)
|
|
|
{
|
|
|
+#ifdef HAS_REAL_ICU
|
|
|
auto charTypeMask = U_GET_GC_MASK(character);
|
|
|
|
|
|
if ((charTypeMask & U_GC_L_MASK) != 0)
|
|
|
{
|
|
|
return CharacterClassificationType::Letter;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
if ((charTypeMask & (U_GC_ND_MASK | U_GC_P_MASK)) != 0)
|
|
|
{
|
|
|
return CharacterClassificationType::DigitOrPunct;
|
|
|
@@ -346,71 +454,15 @@ namespace PlatformAgnostic
|
|
|
// * C1_SPACE corresponds to the Unicode Zs category.
|
|
|
// * C1_BLANK corresponds to a hardcoded list thats ill-defined.
|
|
|
// We'll skip that compatibility here and just check for Zs.
|
|
|
- // We explicitly check for 0xFEFF to satisfy the unit test in es5/Lex_u3.js
|
|
|
+ // We explicitly check for 0xFEFF to satisfy the unit test in es5/Lex_u3.js
|
|
|
if ((charTypeMask & U_GC_ZS_MASK) != 0 ||
|
|
|
character == 0xFEFF ||
|
|
|
character == 0xFFFE)
|
|
|
{
|
|
|
return CharacterClassificationType::Whitespace;
|
|
|
}
|
|
|
-
|
|
|
+#endif
|
|
|
return CharacterClassificationType::Invalid;
|
|
|
}
|
|
|
-
|
|
|
- int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut)
|
|
|
- {
|
|
|
- int32_t resultStringLength = 0;
|
|
|
- UErrorCode errorCode = U_ZERO_ERROR;
|
|
|
-
|
|
|
- static_assert(sizeof(UChar) == sizeof(char16), "Unexpected char type from ICU, function might have to be updated");
|
|
|
- if (caseFlags == CaseFlagsUpper)
|
|
|
- {
|
|
|
- resultStringLength = u_strToUpper((UChar*) destString, destLength,
|
|
|
- (UChar*) sourceString, sourceLength, NULL, &errorCode);
|
|
|
- }
|
|
|
- else if (caseFlags == CaseFlagsLower)
|
|
|
- {
|
|
|
- resultStringLength = u_strToLower((UChar*) destString, destLength,
|
|
|
- (UChar*) sourceString, sourceLength, NULL, &errorCode);
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- Assert(false);
|
|
|
- }
|
|
|
-
|
|
|
- if (U_FAILURE(errorCode))
|
|
|
- {
|
|
|
- *pErrorOut = TranslateUErrorCode(errorCode);
|
|
|
- return -1;
|
|
|
- }
|
|
|
-
|
|
|
- // Todo: check for resultStringLength > destLength
|
|
|
- // Return insufficient buffer in that case
|
|
|
- return resultStringLength;
|
|
|
- }
|
|
|
-
|
|
|
- uint32 ChangeStringCaseInPlace(CaseFlags caseFlags, char16* stringToChange, uint32 bufferLength)
|
|
|
- {
|
|
|
- // Assert pointers
|
|
|
- Assert(stringToChange != nullptr);
|
|
|
- ApiError error = NoError;
|
|
|
-
|
|
|
- if (bufferLength == 0 || stringToChange == nullptr)
|
|
|
- {
|
|
|
- return 0;
|
|
|
- }
|
|
|
-
|
|
|
- int32 ret = ChangeStringLinguisticCase(caseFlags, stringToChange, bufferLength, stringToChange, bufferLength, &error);
|
|
|
-
|
|
|
- // Callers to this function don't expect any errors
|
|
|
- Assert(error == ApiError::NoError);
|
|
|
- Assert(ret > 0);
|
|
|
- return (uint32) ret;
|
|
|
- }
|
|
|
-
|
|
|
- int LogicalStringCompare(const char16* string1, const char16* string2)
|
|
|
- {
|
|
|
- return PlatformAgnostic::UnicodeText::Internal::LogicalStringCompareImpl<char16>(string1, string2);
|
|
|
- }
|
|
|
- };
|
|
|
+ }
|
|
|
};
|