Ver Fonte

cross-platform: --no-icu support

All the none-unicode tests pass

Added bunch of 'todo' comments for unicode improvements without ICU
Oguz Bastemur há 9 anos atrás
pai
commit
a1b0812544

+ 40 - 11
CMakeLists.txt

@@ -18,15 +18,38 @@ if(STATIC_LIBRARY_SH)
   set(STATIC_LIBRARY 1)
 endif()
 
+if(ICU_SETTINGS_RESET)
+  unset(ICU_SETTINGS_RESET CACHE)
+  unset(ICU_INCLUDE_PATH CACHE)
+  unset(ICU_INCLUDE_PATH_SH CACHE)
+  unset(NO_ICU_PATH_GIVEN_SH CACHE)
+  unset(NO_ICU_PATH_GIVEN CACHE)
+endif()
+
+if(ICU_INCLUDE_PATH_SH)
+  set(ICU_INCLUDE_PATH ${ICU_INCLUDE_PATH_SH})
+  unset(NO_ICU_PATH_GIVEN_SH CACHE)
+  unset(NO_ICU_PATH_GIVEN CACHE)
+  unset(ICU_INCLUDE_PATH_SH CACHE)
+endif()
+
+if(NO_ICU_PATH_GIVEN_SH)
+  set(NO_ICU_PATH_GIVEN ${NO_ICU_PATH_GIVEN_SH})
+  unset(NO_ICU_PATH_GIVEN_SH CACHE)
+  unset(ICU_INCLUDE_PATH_SH CACHE)
+  unset(ICU_INCLUDE_PATH CACHE)
+endif()
+
 function(clr_unknown_arch)
-    if (WIN32)
-        message(FATAL_ERROR "Only AMD64, ARM and I386 are supported")
-    else()
-        message(FATAL_ERROR "Only AMD64 and ARM are supported")
-    endif()
+  if (WIN32)
+      message(FATAL_ERROR "Only AMD64, ARM and I386 are supported")
+  else()
+      message(FATAL_ERROR "Only AMD64 and ARM are supported")
+  endif()
 endfunction()
 
 if(ICU_INCLUDE_PATH)
+  add_definitions(-DHAS_REAL_ICU=1)
   set(ICU_CC_PATH "${ICU_INCLUDE_PATH}/../lib/")
   find_library(ICUUC icuuc PATHS ${ICU_CC_PATH} NO_DEFAULT_PATH)
   find_library(ICU18 icui18n PATHS ${ICU_CC_PATH} NO_DEFAULT_PATH)
@@ -43,7 +66,10 @@ endif()
 
 if(CMAKE_SYSTEM_NAME STREQUAL Linux)
     if(NOT ICULIB)
-      set(ICULIB "icuuc")
+      if(NOT NO_ICU_PATH_GIVEN)
+        set(ICULIB "icuuc")
+        add_definitions(-DHAS_REAL_ICU=1)
+      endif()
     endif()
 
     set(CLR_CMAKE_PLATFORM_UNIX 1)
@@ -59,11 +85,14 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin)
     )
 
     if(NOT ICULIB)
-      set(ICULIB "icucore")
-      add_definitions(
-        -DU_DISABLE_RENAMING=1 #in case we link against to an older binary of icu
-        )
-      message("using ICU from system default: ${ICULIB}")
+      if(NOT NO_ICU_PATH_GIVEN)
+        set(ICULIB "icucore")
+        add_definitions(-DHAS_REAL_ICU=1)
+        add_definitions(
+          -DU_DISABLE_RENAMING=1 #in case we link against to an older binary of icu
+          )
+        message("using ICU from system default: ${ICULIB}")
+      endif()
     endif()
 
     set(CLR_CMAKE_PLATFORM_UNIX 1)

+ 7 - 2
build.sh

@@ -28,6 +28,7 @@ PRINT_USAGE() {
     echo "      --icu=PATH       Path to ICU include folder (see example below)"
     echo "  -j [N], --jobs[=N]   Multicore build, allow N jobs at once"
     echo "  -n, --ninja          Build with ninja instead of make"
+    echo "  --no-icu             Compile without unicode/icu support"
     echo "      --xcode          Generate XCode project"
     echo "  -t, --test-build     Test build (by default Release build)"
     echo "      --static         Build as static library (by default shared library)"
@@ -52,7 +53,7 @@ BUILD_TYPE="Release"
 CMAKE_GEN=
 MAKE=make
 MULTICORE_BUILD=""
-ICU_PATH=""
+ICU_PATH="-DICU_SETTINGS_RESET=1"
 STATIC_LIBRARY="-DSHARED_LIBRARY_SH=1"
 WITHOUT_FEATURES=""
 CREATE_DEB=0
@@ -106,7 +107,7 @@ while [[ $# -gt 0 ]]; do
 
     --icu=*)
         ICU_PATH=$1
-        ICU_PATH="-DICU_INCLUDE_PATH=${ICU_PATH:6}"
+        ICU_PATH="-DICU_INCLUDE_PATH_SH=${ICU_PATH:6}"
         ;;
 
     -n | --ninja)
@@ -114,6 +115,10 @@ while [[ $# -gt 0 ]]; do
         MAKE=ninja
         ;;
 
+    --no-icu)
+        ICU_PATH="-DNO_ICU_PATH_GIVEN_SH=1"
+        ;;
+
     --xcode)
         CMAKE_GEN="-G Xcode -DCC_XCODE_PROJECT=1"
         MAKE=0

+ 2 - 0
lib/Common/CommonDefines.h

@@ -88,7 +88,9 @@
 // ByteCode
 #define VARIABLE_INT_ENCODING 1                     // Byte code serialization variable size int field encoding
 #define BYTECODE_BRANCH_ISLAND                      // Byte code short branch and branch island
+#if defined(_WIN32) || defined(HAS_REAL_ICU)
 #define ENABLE_UNICODE_API 1                        // Enable use of Unicode-related APIs
+#endif
 // Language features
 // xplat-todo: revisit these features
 #ifdef _WIN32

+ 6 - 8
lib/Parser/CharClassifier.cpp

@@ -193,6 +193,7 @@ bool Js::CharClassifier::BigCharIsIdContinueDefault(codepoint_t ch, const Js::Ch
 {
     return (instance->getBigCharFlagsFunc(ch, instance) & PlatformAgnostic::UnicodeText::CharacterTypeFlags::IdChar) != 0;
 }
+#endif
 
 CharTypes Js::CharClassifier::GetBigCharTypeES5(codepoint_t codepoint, const Js::CharClassifier *instance)
 {
@@ -358,7 +359,6 @@ bool Js::CharClassifier::BigCharIsIdContinueES6(codepoint_t codePoint, const Cha
 
     return PlatformAgnostic::UnicodeText::IsIdContinue(codePoint);
 }
-#endif
 
 template <bool isBigChar>
 bool Js::CharClassifier::IsWhiteSpaceFast(codepoint_t ch) const
@@ -419,11 +419,12 @@ Js::CharClassifier::CharClassifier(ScriptContext * scriptContext)
     }
 #endif
 
-#if ENABLE_UNICODE_API
     // If we're in ES6 mode, and we have full support for Unicode character classification
     // from an external library, then use the ES6/Surrogate pair supported versions of the functions
     // Otherwise, fallback to the ES5 versions which don't need an external library
+#if ENABLE_UNICODE_API
     if (isES6UnicodeModeEnabled && isFullUnicodeSupportAvailable)
+#endif
     {
         bigCharIsIdStartFunc = &CharClassifier::BigCharIsIdStartES6;
         bigCharIsIdContinueFunc = &CharClassifier::BigCharIsIdContinueES6;
@@ -435,8 +436,8 @@ Js::CharClassifier::CharClassifier(ScriptContext * scriptContext)
         getBigCharTypeFunc = &CharClassifier::GetBigCharTypeES6;
         getBigCharFlagsFunc = &CharClassifier::GetBigCharFlagsES6;
     }
+#if ENABLE_UNICODE_API
     else
-#endif
     {
         bigCharIsIdStartFunc = &CharClassifier::BigCharIsIdStartDefault;
         bigCharIsIdContinueFunc = &CharClassifier::BigCharIsIdContinueDefault;
@@ -448,6 +449,8 @@ Js::CharClassifier::CharClassifier(ScriptContext * scriptContext)
         getBigCharTypeFunc = &CharClassifier::GetBigCharTypeES5;
         getBigCharFlagsFunc = &CharClassifier::GetBigCharFlagsES5;
     }
+#endif
+
 }
 
 const OLECHAR* Js::CharClassifier::SkipWhiteSpaceNonSurrogate(LPCOLESTR psz, const CharClassifier *instance)
@@ -647,12 +650,7 @@ const LPCUTF8 Js::CharClassifier::SkipIdentifierSurrogateStartEnd(LPCUTF8 psz, L
 
 CharTypes Js::CharClassifier::GetCharType(codepoint_t ch) const
 {
-#if ENABLE_UNICODE_API
     return FBigChar(ch) ? getBigCharTypeFunc(ch, this) : charTypes[ch];
-#else
-    Assert(!FBigChar(ch));
-    return charTypes[ch];
-#endif
 }
 
 #if ENABLE_UNICODE_API

+ 0 - 4
lib/Parser/CharClassifier.h

@@ -116,13 +116,11 @@ namespace Js
         static bool BigCharIsIdStartES6(codepoint_t codePoint, const CharClassifier *instance);
         static bool BigCharIsIdContinueES6(codepoint_t codePoint, const CharClassifier *instance);
 
-#if ENABLE_UNICODE_API
         static CharTypes GetBigCharTypeES6(codepoint_t ch, const CharClassifier *instance);
         static PlatformAgnostic::UnicodeText::CharacterTypeFlags GetBigCharFlagsES6(codepoint_t ch, const CharClassifier *instance);
 
         static CharTypes GetBigCharTypeES5(codepoint_t ch, const CharClassifier *instance);
         static PlatformAgnostic::UnicodeText::CharacterTypeFlags GetBigCharFlagsES5(codepoint_t ch, const CharClassifier *instance);
-#endif
 
         static const OLECHAR* SkipWhiteSpaceSurrogate(LPCOLESTR psz, const CharClassifier *instance);
         static const OLECHAR* SkipWhiteSpaceSurrogateStartEnd(_In_reads_(pStrEnd - pStr) LPCOLESTR pStr, _In_ LPCOLESTR pStrEnd, const CharClassifier *instance);
@@ -134,14 +132,12 @@ namespace Js
         static const OLECHAR* SkipIdentifierNonSurrogate(LPCOLESTR psz, const CharClassifier *instance);
         static const LPCUTF8 SkipIdentifierNonSurrogateStartEnd(LPCUTF8 psz, LPCUTF8 end, const CharClassifier *instance);
 
-#if ENABLE_UNICODE_API
         CharTypes (*getBigCharTypeFunc)(codepoint_t ch, const CharClassifier *instance);
         PlatformAgnostic::UnicodeText::CharacterTypeFlags (*getBigCharFlagsFunc)(codepoint_t ch, const CharClassifier *instance);
 
         bool (*bigCharIsWhitespaceFunc)(codepoint_t ch, const CharClassifier *instance);
         bool (*bigCharIsIdStartFunc)(codepoint_t ch, const CharClassifier *instance);
         bool (*bigCharIsIdContinueFunc)(codepoint_t ch, const CharClassifier *instance);
-#endif
 
         const OLECHAR* (*skipWhiteSpaceFunc)(LPCOLESTR psz, const CharClassifier* instance);
         const OLECHAR* (*skipWhiteSpaceStartEndFunc)(LPCOLESTR pStr, LPCOLESTR pStrEnd, const CharClassifier* instance);

+ 1 - 1
lib/Runtime/Library/JavascriptString.cpp

@@ -2240,7 +2240,7 @@ case_2:
                 {
                     if (*i <= 'Z') { break; }
                     if (*i >= 192)
-                    { 
+                    {
                         if (*i < 223) { break; }
                         if (*i >= 255) { break; }
                     }

+ 0 - 2
lib/Runtime/Library/JavascriptString.h

@@ -137,9 +137,7 @@ namespace Js
         virtual RecyclableObject * CloneToScriptContext(ScriptContext* requestContext) override;
 
         virtual BOOL BufferEquals(__in_ecount(otherLength) LPCWSTR otherBuffer, __in charcount_t otherLength);
-#if ENABLE_UNICODE_API
         char16* GetNormalizedString(PlatformAgnostic::UnicodeText::NormalizationForm, ArenaAllocator*, charcount_t&);
-#endif
 
         static bool Is(Var aValue);
         static JavascriptString* FromVar(Var aValue);

+ 130 - 78
lib/Runtime/PlatformAgnostic/Platform/Linux/UnicodeText.ICU.cpp

@@ -5,9 +5,16 @@
 
 #include "RuntimePlatformAgnosticPch.h"
 #include "UnicodeText.h"
+#ifdef HAS_REAL_ICU
 #include <unicode/uchar.h>
 #include <unicode/ustring.h>
 #include <unicode/normalizer2.h>
+#else
+#define UErrorCode int
+#define U_ZERO_ERROR 0
+#define UChar char16
+#include <string.h>
+#endif
 
 namespace PlatformAgnostic
 {
@@ -20,6 +27,7 @@ namespace PlatformAgnostic
         static_assert(sizeof(char16) == sizeof(UChar),
             "This implementation depends on ICU char size matching char16's size");
 
+#ifdef HAS_REAL_ICU
         // Helper ICU conversion facilities
         static const Normalizer2* TranslateToICUNormalizer(NormalizationForm normalizationForm)
         {
@@ -49,10 +57,10 @@ namespace PlatformAgnostic
             return normalizer;
         }
 
-        // 
+        //
         // Check if a UTF16 string is valid according to the UTF16 standard
         // Specifically, check that we don't have any invalid surrogate pairs
-        // If the string is valid, we return true. 
+        // If the string is valid, we return true.
         // If not, we set invalidIndex to the index of the first invalid char index
         // and return false
         // If the invalid char is a lead surrogate pair, we return its index
@@ -80,7 +88,7 @@ namespace PlatformAgnostic
                 if (c == 0)
                 {
                     return true;
-                }                
+                }
                 if (U_IS_SURROGATE(c))
                 {
                     if (U16_IS_LEAD(c))
@@ -88,11 +96,11 @@ namespace PlatformAgnostic
                         *invalidIndex = i;
                     }
                     else
-                    { 
+                    {
                         Assert(i > 0);
                         *invalidIndex = i - 1;
                     }
-                    
+
                     return false;
                 }
 
@@ -100,7 +108,7 @@ namespace PlatformAgnostic
                 {
                     return true;
                 }
-            }            
+            }
         }
 
         static ApiError TranslateUErrorCode(UErrorCode icuError)
@@ -133,7 +141,7 @@ namespace PlatformAgnostic
             Assert(destString != nullptr || destLength == 0);
 
             // This is semantically different than the Win32 NormalizeString API
-            // For our usage, we always pass in the length rather than letting Windows 
+            // For our usage, we always pass in the length rather than letting Windows
             // calculate the length for us
             Assert(sourceLength > 0);
             Assert(destLength >= 0);
@@ -179,7 +187,7 @@ namespace PlatformAgnostic
                 *pErrorOut = TranslateUErrorCode(errorCode);
                 return -1;
             }
-            
+
             return normalizedStringLength;
         }
 
@@ -193,7 +201,7 @@ namespace PlatformAgnostic
             {
                 length = u_strlen((const UChar*) testString);
             }
-            
+
             // On Windows, IsNormalizedString returns failure if the string
             // is a malformed utf16 string. Maintain the same behavior here.
             size_t invalidIndex = 0;
@@ -201,7 +209,7 @@ namespace PlatformAgnostic
             {
                 return false;
             }
-                
+
             const Normalizer2* normalizer = TranslateToICUNormalizer(normalizationForm);
             Assert(normalizer != nullptr);
 
@@ -212,24 +220,83 @@ namespace PlatformAgnostic
             return isNormalized;
         }
 
-        // Since we're using the ICU here, this is trivially true
-        bool IsExternalUnicodeLibraryAvailable()
+        bool IsWhitespace(codepoint_t ch)
         {
-            return true;
+            return u_isUWhiteSpace(ch) == 1;
         }
 
+        int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut)
+        {
+            int32_t resultStringLength = 0;
+            UErrorCode errorCode = U_ZERO_ERROR;
+
+            static_assert(sizeof(UChar) == sizeof(char16), "Unexpected char type from ICU, function might have to be updated");
+            if (caseFlags == CaseFlagsUpper)
+            {
+                resultStringLength = u_strToUpper((UChar*) destString, destLength,
+                    (UChar*) sourceString, sourceLength, NULL, &errorCode);
+            }
+            else if (caseFlags == CaseFlagsLower)
+            {
+                resultStringLength = u_strToLower((UChar*) destString, destLength,
+                    (UChar*) sourceString, sourceLength, NULL, &errorCode);
+            }
+            else
+            {
+                Assert(false);
+            }
+
+            if (U_FAILURE(errorCode))
+            {
+                *pErrorOut = TranslateUErrorCode(errorCode);
+                return -1;
+            }
+
+            // Todo: check for resultStringLength > destLength
+            // Return insufficient buffer in that case
+            return resultStringLength;
+        }
+#else
         bool IsWhitespace(codepoint_t ch)
         {
-            return u_isUWhiteSpace(ch) == 1;
+            // todo : fix this!!!
+            char *asc = (char*)&ch;
+            return asc[0] == ' ' || asc[0] == '\n' || asc[0] == '\t' || asc[0] == '\r';
+        }
+
+        bool IsNormalizedString(NormalizationForm normalizationForm, const char16* testString, int32 testStringLength) {
+            // TODO: implement this
+            return true;
+        }
+
+#define EMPTY_COPY \
+    const int len = (destLength <= sourceLength) ? destLength - 1 : sourceLength; \
+    memcpy(destString, sourceString, len * sizeof(char16)); \
+    destString[len] = char16(0); \
+    *pErrorOut = NoError; \
+    return len;
+
+        int32 NormalizeString(NormalizationForm normalizationForm, const char16* sourceString, uint32 sourceLength, char16* destString, int32 destLength, ApiError* pErrorOut)
+        {
+            // TODO: implement this
+            EMPTY_COPY
+        }
+
+        int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut)
+        {
+            // TODO: implement this
+            EMPTY_COPY
         }
+#endif
 
         bool IsIdStart(codepoint_t ch)
         {
+#ifdef HAS_REAL_ICU
             if (u_isIDStart(ch))
             {
                 return true;
             }
-
+#endif
             // Following codepoints are treated as part of ID_Start
             // for backwards compatibility as per section 2.5 of the Unicode 8 spec
             // See http://www.unicode.org/reports/tr31/tr31-23.html#Backward_Compatibility
@@ -243,14 +310,15 @@ namespace PlatformAgnostic
             default: return false;
             }
         }
-        
+
         bool IsIdContinue(codepoint_t ch)
         {
+#ifdef HAS_REAL_ICU
             if (u_hasBinaryProperty(ch, UCHAR_ID_CONTINUE) == 1)
             {
                 return true;
             }
-
+#endif
             // Following codepoints are treated as part of ID_Continue
             // for backwards compatibility as per section 2.5 of the Unicode 8 spec
             // See http://www.unicode.org/reports/tr31/tr31-23.html#Backward_Compatibility
@@ -273,8 +341,47 @@ namespace PlatformAgnostic
             }
         }
 
+        uint32 ChangeStringCaseInPlace(CaseFlags caseFlags, char16* stringToChange, uint32 bufferLength)
+        {
+#ifndef HAS_REAL_ICU
+            return bufferLength;
+#else
+            // Assert pointers
+            Assert(stringToChange != nullptr);
+            ApiError error = NoError;
+
+            if (bufferLength == 0 || stringToChange == nullptr)
+            {
+                return 0;
+            }
+
+            int32 ret = ChangeStringLinguisticCase(caseFlags, stringToChange, bufferLength, stringToChange, bufferLength, &error);
+
+            // Callers to this function don't expect any errors
+            Assert(error == ApiError::NoError);
+            Assert(ret > 0);
+            return (uint32) ret;
+#endif
+        }
+
+        int LogicalStringCompare(const char16* string1, const char16* string2)
+        {
+            return PlatformAgnostic::UnicodeText::Internal::LogicalStringCompareImpl<char16>(string1, string2);
+        }
+
+        bool IsExternalUnicodeLibraryAvailable()
+        {
+#if defined(HAS_REAL_ICU)
+            // Since we're using the ICU here, this is trivially true
+            return true;
+#else
+            return false;
+#endif
+        }
+
         UnicodeGeneralCategoryClass GetGeneralCategoryClass(codepoint_t ch)
         {
+#ifdef HAS_REAL_ICU
             int8_t charType = u_charType(ch);
 
             if (charType == U_LOWERCASE_LETTER ||
@@ -321,7 +428,7 @@ namespace PlatformAgnostic
             {
                 return UnicodeGeneralCategoryClass::CategoryClassConnectorPunctuation;
             }
-            
+#endif
             return UnicodeGeneralCategoryClass::CategoryClassOther;
         }
 
@@ -330,13 +437,14 @@ namespace PlatformAgnostic
         // Windows and Linux
         CharacterClassificationType GetLegacyCharacterClassificationType(char16 character)
         {
+#ifdef HAS_REAL_ICU
             auto charTypeMask = U_GET_GC_MASK(character);
 
             if ((charTypeMask & U_GC_L_MASK) != 0)
             {
                 return CharacterClassificationType::Letter;
             }
-            
+
             if ((charTypeMask & (U_GC_ND_MASK | U_GC_P_MASK)) != 0)
             {
                 return CharacterClassificationType::DigitOrPunct;
@@ -346,71 +454,15 @@ namespace PlatformAgnostic
             //  * C1_SPACE corresponds to the Unicode Zs category.
             //  * C1_BLANK corresponds to a hardcoded list thats ill-defined.
             // We'll skip that compatibility here and just check for Zs.
-            // We explicitly check for 0xFEFF to satisfy the unit test in es5/Lex_u3.js   
+            // We explicitly check for 0xFEFF to satisfy the unit test in es5/Lex_u3.js
             if ((charTypeMask & U_GC_ZS_MASK) != 0 ||
                 character == 0xFEFF ||
                 character == 0xFFFE)
             {
                 return CharacterClassificationType::Whitespace;
             }
-            
+#endif
             return CharacterClassificationType::Invalid;
         }
-            
-        int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut)
-        {
-            int32_t resultStringLength = 0;
-            UErrorCode errorCode = U_ZERO_ERROR;
-
-            static_assert(sizeof(UChar) == sizeof(char16), "Unexpected char type from ICU, function might have to be updated");
-            if (caseFlags == CaseFlagsUpper)
-            {
-                resultStringLength = u_strToUpper((UChar*) destString, destLength,
-                    (UChar*) sourceString, sourceLength, NULL, &errorCode);
-            }
-            else if (caseFlags == CaseFlagsLower)
-            {
-                resultStringLength = u_strToLower((UChar*) destString, destLength,
-                    (UChar*) sourceString, sourceLength, NULL, &errorCode);
-            }
-            else
-            {
-                Assert(false);
-            }
-
-            if (U_FAILURE(errorCode))
-            {
-                *pErrorOut = TranslateUErrorCode(errorCode);
-                return -1;
-            }
-
-            // Todo: check for resultStringLength > destLength
-            // Return insufficient buffer in that case
-            return resultStringLength;
-        }
-
-        uint32 ChangeStringCaseInPlace(CaseFlags caseFlags, char16* stringToChange, uint32 bufferLength)
-        {
-            // Assert pointers
-            Assert(stringToChange != nullptr);
-            ApiError error = NoError;
-
-            if (bufferLength == 0 || stringToChange == nullptr)
-            {
-                return 0;
-            }
-            
-            int32 ret = ChangeStringLinguisticCase(caseFlags, stringToChange, bufferLength, stringToChange, bufferLength, &error);
-
-            // Callers to this function don't expect any errors
-            Assert(error == ApiError::NoError);
-            Assert(ret > 0);
-            return (uint32) ret;
-        }
-
-        int LogicalStringCompare(const char16* string1, const char16* string2)
-        {
-            return PlatformAgnostic::UnicodeText::Internal::LogicalStringCompareImpl<char16>(string1, string2);
-        }
-    };
+    }
 };

+ 52 - 53
lib/Runtime/PlatformAgnostic/Platform/Windows/UnicodeText.cpp

@@ -114,8 +114,8 @@ namespace PlatformAgnostic
         static TRet ExecuteWithThreadContext(Fn fn, TRet defaultValue)
         {
             // TODO: We should remove the depedency on ThreadContext for this layer
-            // Currently, this exists since Windows.Globalization.dll is delay loaded and the 
-            // handle is stored on the thread context. We should move the management of the 
+            // Currently, this exists since Windows.Globalization.dll is delay loaded and the
+            // handle is stored on the thread context. We should move the management of the
             // lifetime of that DLL to this layer, so that we can move the PAIL out of Runtime
             // into Common.
             ThreadContext* threadContext = ThreadContext::GetContextForCurrentThread();
@@ -147,7 +147,7 @@ namespace PlatformAgnostic
                 return defaultValue;
             }, defaultValue);
         }
- 
+
         template <typename Fn>
         static bool ExecuteWinGlobCodepointCheckApi(codepoint_t codepoint, Fn fn)
         {
@@ -157,8 +157,8 @@ namespace PlatformAgnostic
                 Assert(SUCCEEDED(hr));
                 return (returnValue != 0);
             }, false);
-        }        
-        
+        }
+
         // Helper Win32 conversion utilities
         static NORM_FORM TranslateToWin32NormForm(NormalizationForm normalizationForm)
         {
@@ -198,48 +198,6 @@ namespace PlatformAgnostic
             }
         }
 
-        // Win32 implementation of platform-agnostic Unicode interface
-        // These are the public APIs of this interface
-        CharacterClassificationType GetLegacyCharacterClassificationType(char16 ch)
-        {
-            WORD charType = 0;
-            BOOL res = ::GetStringTypeW(CT_CTYPE1, &ch, 1, &charType);
-
-            if (res == TRUE)
-            {
-                // BOM ( 0xfeff) is recognized as GetStringTypeW as WS.
-                if ((0x03FF & charType) == 0x0200)
-                {
-                    // Some of the char types changed for Whistler (Unicode 3.0).
-                    // They will return 0x0200 on Whistler, indicating a defined char
-                    // with no type attributes. We want to continue to support these
-                    // characters, so we return the Win2K (Unicode 2.1) attributes.
-                    // We only return the ones we care about - ALPHA for ALPHA, PUNCT
-                    // for PUNCT or DIGIT, and SPACE for SPACE or BLANK.
-                    WORD wOldCharType = oFindOldCharType(ch);
-                    if (0 == wOldCharType)
-                        return CharacterClassificationType::Invalid;
-
-                    charType = wOldCharType;
-                }
-
-                if (charType & C1_ALPHA)
-                {
-                    return CharacterClassificationType::Letter;
-                }
-                else if (charType & (C1_DIGIT | C1_PUNCT))
-                {
-                    return CharacterClassificationType::DigitOrPunct;
-                }
-                else if (charType & (C1_SPACE | C1_BLANK))
-                {
-                    return CharacterClassificationType::Whitespace;
-                }
-            }
-
-            return CharacterClassificationType::Invalid;
-        }
-
         int32 NormalizeString(NormalizationForm normalizationForm, const char16* sourceString, uint32 sourceLength, char16* destString, int32 destLength, ApiError* pErrorOut)
         {
             // Assert pointers
@@ -247,7 +205,7 @@ namespace PlatformAgnostic
             Assert(destString != nullptr || destLength == 0);
 
             // This is semantically different than the Win32 NormalizeString API
-            // For our usage, we always pass in the length rather than letting Windows 
+            // For our usage, we always pass in the length rather than letting Windows
             // calculate the length for us
             Assert(sourceLength > 0);
             Assert(destLength >= 0);
@@ -287,7 +245,7 @@ namespace PlatformAgnostic
             Assert(sourceString != nullptr);
             Assert(destString != nullptr || destLength == 0);
 
-            // LCMapString does not allow the source length to be set to 0 
+            // LCMapString does not allow the source length to be set to 0
             Assert(sourceLength > 0);
 
             *pErrorOut = NoError;
@@ -317,7 +275,7 @@ namespace PlatformAgnostic
             {
                 return 0;
             }
-            
+
             if (caseFlags == CaseFlagsUpper)
             {
                 return (uint32) CharUpperBuff(sourceString, sourceLength);
@@ -326,7 +284,7 @@ namespace PlatformAgnostic
             {
                 return (uint32) CharLowerBuff(sourceString, sourceLength);
             }
- 
+
             AssertMsg(false, "Invalid flags passed to ChangeStringCaseInPlace");
             return 0;
         }
@@ -335,7 +293,7 @@ namespace PlatformAgnostic
         {
             return ExecuteWinGlobApi([&](IUnicodeCharactersStatics* pUnicodeCharStatics) {
                 UnicodeGeneralCategory category = UnicodeGeneralCategory::UnicodeGeneralCategory_NotAssigned;
-                
+
                 HRESULT hr = pUnicodeCharStatics->GetGeneralCategory(codepoint, &category);
                 Assert(SUCCEEDED(hr));
                 if (SUCCEEDED(hr))
@@ -410,13 +368,54 @@ namespace PlatformAgnostic
 
         int LogicalStringCompare(const char16* string1, const char16* string2)
         {
-            // CompareStringW called with these flags is equivalent to calling StrCmpLogicalW 
+            // CompareStringW called with these flags is equivalent to calling StrCmpLogicalW
             // and we have the added advantage of not having to link with shlwapi.lib just for one function
             int i = CompareStringW(LOCALE_USER_DEFAULT, NORM_IGNORECASE | SORT_DIGITSASNUMBERS, string1, -1, string2, -1);
 
             return i - CSTR_EQUAL;
         }
 
+        // Win32 implementation of platform-agnostic Unicode interface
+        // These are the public APIs of this interface
+        CharacterClassificationType GetLegacyCharacterClassificationType(char16 ch)
+        {
+            WORD charType = 0;
+            BOOL res = ::GetStringTypeW(CT_CTYPE1, &ch, 1, &charType);
+
+            if (res == TRUE)
+            {
+                // BOM ( 0xfeff) is recognized as GetStringTypeW as WS.
+                if ((0x03FF & charType) == 0x0200)
+                {
+                    // Some of the char types changed for Whistler (Unicode 3.0).
+                    // They will return 0x0200 on Whistler, indicating a defined char
+                    // with no type attributes. We want to continue to support these
+                    // characters, so we return the Win2K (Unicode 2.1) attributes.
+                    // We only return the ones we care about - ALPHA for ALPHA, PUNCT
+                    // for PUNCT or DIGIT, and SPACE for SPACE or BLANK.
+                    WORD wOldCharType = oFindOldCharType(ch);
+                    if (0 == wOldCharType)
+                        return CharacterClassificationType::Invalid;
+
+                    charType = wOldCharType;
+                }
+
+                if (charType & C1_ALPHA)
+                {
+                    return CharacterClassificationType::Letter;
+                }
+                else if (charType & (C1_DIGIT | C1_PUNCT))
+                {
+                    return CharacterClassificationType::DigitOrPunct;
+                }
+                else if (charType & (C1_SPACE | C1_BLANK))
+                {
+                    return CharacterClassificationType::Whitespace;
+                }
+            }
+
+            return CharacterClassificationType::Invalid;
+        }
     };
 };
 

+ 7 - 12
pal/src/configure.cmake

@@ -869,10 +869,6 @@ int main(int argc, char **argv)
 }" UNWIND_CONTEXT_IS_UCONTEXT_T)
 
 if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
-  if(NOT HAVE_LIBICU_UCHAR_H)
-    unset(HAVE_LIBICU_UCHAR_H CACHE)
-    message(FATAL_ERROR "Cannot find ICU. Try installing libicu-dev or the appropriate packages for your platform")
-  endif()
   set(HAVE_COREFOUNDATION 1)
   set(HAVE__NSGETENVIRON 1)
   set(DEADLOCK_WHEN_THREAD_IS_SUSPENDED_WHILE_BLOCKED_ON_MUTEX 1)
@@ -890,10 +886,6 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
     unset(HAVE_LIBUNWIND_H CACHE)
     message(FATAL_ERROR "Cannot find libunwind. Try installing libunwind8 and libunwind8-dev (or the appropriate packages for your platform)")
   endif()
-  if(NOT HAVE_LIBICU_UCHAR_H)
-    unset(HAVE_LIBICU_UCHAR_H CACHE)
-    message(FATAL_ERROR "Cannot find ICU. Try installing libicu-dev or the appropriate packages for your platform")
-  endif()
   set(DEADLOCK_WHEN_THREAD_IS_SUSPENDED_WHILE_BLOCKED_ON_MUTEX 0)
   set(PAL_PTRACE "ptrace((cmd), (pid), (caddr_t)(addr), (data))")
   set(PAL_PT_ATTACH PT_ATTACH)
@@ -915,10 +907,6 @@ else() # Anything else is Linux
     unset(HAVE_LIBUNWIND_H CACHE)
     message(FATAL_ERROR "Cannot find libunwind. Try installing libunwind8 and libunwind8-dev (or the appropriate packages for your platform)")
   endif()
-  if(NOT HAVE_LIBICU_UCHAR_H)
-    unset(HAVE_LIBICU_UCHAR_H CACHE)
-    message(FATAL_ERROR "Cannot find ICU. Try installing libicu-dev or the appropriate packages for your platform")
-  endif()
   set(DEADLOCK_WHEN_THREAD_IS_SUSPENDED_WHILE_BLOCKED_ON_MUTEX 0)
   set(PAL_PTRACE "ptrace((cmd), (pid), (void*)(addr), (data))")
   set(PAL_PT_ATTACH PTRACE_ATTACH)
@@ -931,4 +919,11 @@ else() # Anything else is Linux
   set(HAS_FTRUNCATE_LENGTH_ISSUE 0)
 endif(CMAKE_SYSTEM_NAME STREQUAL Darwin)
 
+if(NOT NO_ICU_PATH_GIVEN)
+  if(NOT HAVE_LIBICU_UCHAR_H)
+    unset(HAVE_LIBICU_UCHAR_H CACHE)
+    message(FATAL_ERROR "Cannot find ICU. Try installing libicu-dev or the appropriate packages for your platform. You may also disable icu/unicode with '--no-icu' argument")
+  endif()
+endif()
+
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)