| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #pragma once
- #include "Utf8Codex.h"
- namespace utf8
- {
- ///
- /// Use the codex library to encode a UTF16 string to UTF8.
- /// The caller is responsible for freeing the memory, which is allocated
- /// using Allocator.
- /// The returned string is null terminated.
- ///
- template <class Allocator>
- HRESULT WideStringToNarrow(_In_ LPCWSTR sourceString, size_t sourceCount, _Out_ LPSTR* destStringPtr, _Out_ size_t* destCount, size_t* allocateCount = nullptr)
- {
- size_t cchSourceString = sourceCount;
- if (cchSourceString >= MAXUINT32)
- {
- return E_OUTOFMEMORY;
- }
- size_t cbDestString = (cchSourceString + 1) * 3;
- // Check for overflow- cbDestString should be >= cchSourceString
- if (cbDestString < cchSourceString)
- {
- return E_OUTOFMEMORY;
- }
- utf8char_t* destString = (utf8char_t*)Allocator::allocate(cbDestString);
- if (destString == nullptr)
- {
- return E_OUTOFMEMORY;
- }
- size_t cbEncoded = utf8::EncodeTrueUtf8IntoAndNullTerminate(destString, sourceString, (charcount_t) cchSourceString);
- Assert(cbEncoded <= cbDestString);
- static_assert(sizeof(utf8char_t) == sizeof(char), "Needs to be valid for cast");
- *destStringPtr = (char*)destString;
- *destCount = cbEncoded;
- if (allocateCount != nullptr) *allocateCount = cbEncoded;
- return S_OK;
- }
- ///
- /// Use the codex library to encode a UTF8 string to UTF16.
- /// The caller is responsible for freeing the memory, which is allocated
- /// using Allocator.
- /// The returned string is null terminated.
- ///
- template <class Allocator>
- HRESULT NarrowStringToWide(_In_ LPCSTR sourceString, size_t sourceCount, _Out_ LPWSTR* destStringPtr, _Out_ size_t* destCount, size_t* allocateCount = nullptr)
- {
- size_t cbSourceString = sourceCount;
- size_t sourceStart = 0;
- size_t cbDestString = (sourceCount + 1) * sizeof(WCHAR);
- if (cbDestString < sourceCount) // overflow ?
- {
- return E_OUTOFMEMORY;
- }
- WCHAR* destString = (WCHAR*)Allocator::allocate(cbDestString);
- if (destString == nullptr)
- {
- return E_OUTOFMEMORY;
- }
- if (allocateCount != nullptr) *allocateCount = cbDestString;
- for (; sourceStart < sourceCount; sourceStart++)
- {
- const char ch = sourceString[sourceStart];
- if ( ! (ch > 0 && ch < 0x0080) )
- {
- size_t fallback = sourceStart > 3 ? 3 : sourceStart; // 3 + 1 -> fallback at least 1 unicode char
- sourceStart -= fallback;
- break;
- }
- destString[sourceStart] = (WCHAR) ch;
- }
- if (sourceStart == sourceCount)
- {
- *destCount = sourceCount;
- destString[sourceCount] = WCHAR(0);
- *destStringPtr = destString;
- }
- else
- {
- LPCUTF8 remSourceString = (LPCUTF8)sourceString + sourceStart;
- WCHAR *remDestString = destString + sourceStart;
- charcount_t cchDestString = utf8::ByteIndexIntoCharacterIndex(remSourceString, cbSourceString - sourceStart);
- cchDestString += (charcount_t)sourceStart;
- Assert (cchDestString <= sourceCount);
- // Some node tests depend on the utf8 decoder not swallowing invalid unicode characters
- // instead of replacing them with the "replacement" chracter. Pass a flag to our
- // decoder to require such behavior
- utf8::DecodeUnitsIntoAndNullTerminateNoAdvance(remDestString, remSourceString, (LPCUTF8) sourceString + cbSourceString, DecodeOptions::doAllowInvalidWCHARs);
- Assert(destString[cchDestString] == 0);
- static_assert(sizeof(utf8char_t) == sizeof(char), "Needs to be valid for cast");
- *destStringPtr = destString;
- *destCount = cchDestString;
- }
- return S_OK;
- }
- class malloc_allocator
- {
- public:
- static void* allocate(size_t size) { return ::malloc(size); }
- static void free(void* ptr, size_t count) { ::free(ptr); }
- };
- inline HRESULT WideStringToNarrowDynamic(_In_ LPCWSTR sourceString, _Out_ LPSTR* destStringPtr)
- {
- size_t unused;
- return WideStringToNarrow<malloc_allocator>(
- sourceString, wcslen(sourceString), destStringPtr, &unused);
- }
- inline HRESULT NarrowStringToWideDynamic(_In_ LPCSTR sourceString, _Out_ LPWSTR* destStringPtr)
- {
- size_t unused;
- return NarrowStringToWide<malloc_allocator>(
- sourceString, strlen(sourceString), destStringPtr, &unused);
- }
- inline HRESULT NarrowStringToWideDynamicGetLength(_In_ LPCSTR sourceString, _Out_ LPWSTR* destStringPtr, _Out_ size_t* destLength)
- {
- return NarrowStringToWide<malloc_allocator>(
- sourceString, strlen(sourceString), destStringPtr, destLength);
- }
- template <class Allocator, class SrcType, class DstType>
- class NarrowWideStringConverter
- {
- public:
- static size_t Length(const SrcType& src);
- static HRESULT Convert(
- SrcType src, size_t srcCount, DstType* dst, size_t* dstCount, size_t* allocateCount = nullptr);
- };
- template <class Allocator>
- class NarrowWideStringConverter<Allocator, LPCSTR, LPWSTR>
- {
- public:
- // Note: Typically caller should pass in Utf8 string length. Following
- // is used as fallback.
- static size_t Length(LPCSTR src)
- {
- return strnlen(src, INT_MAX);
- }
- static HRESULT Convert(
- LPCSTR sourceString, size_t sourceCount,
- LPWSTR* destStringPtr, size_t* destCount, size_t* allocateCount = nullptr)
- {
- return NarrowStringToWide<Allocator>(
- sourceString, sourceCount, destStringPtr, destCount, allocateCount);
- }
- };
- template <class Allocator>
- class NarrowWideStringConverter<Allocator, LPCWSTR, LPSTR>
- {
- public:
- // Note: Typically caller should pass in WCHAR string length. Following
- // is used as fallback.
- static size_t Length(LPCWSTR src)
- {
- return wcslen(src);
- }
- static HRESULT Convert(
- LPCWSTR sourceString, size_t sourceCount,
- LPSTR* destStringPtr, size_t* destCount, size_t* allocateCount = nullptr)
- {
- return WideStringToNarrow<Allocator>(
- sourceString, sourceCount, destStringPtr, destCount, allocateCount);
- }
- };
- template <class Allocator, class SrcType, class DstType>
- class NarrowWideConverter
- {
- typedef NarrowWideStringConverter<Allocator, SrcType, DstType>
- StringConverter;
- private:
- DstType dst;
- size_t dstCount;
- size_t allocateCount;
- public:
- NarrowWideConverter() : dst()
- {
- // do nothing
- }
- NarrowWideConverter(const SrcType& src, size_t srcCount = -1): dst()
- {
- Initialize(src, srcCount);
- }
- void Initialize(const SrcType& src, size_t srcCount = -1)
- {
- if (srcCount == -1)
- {
- srcCount = StringConverter::Length(src);
- }
- StringConverter::Convert(src, srcCount, &dst, &dstCount, &allocateCount);
- }
- ~NarrowWideConverter()
- {
- if (dst)
- {
- Allocator::free(dst, allocateCount);
- }
- }
- DstType Detach()
- {
- DstType result = dst;
- dst = DstType();
- return result;
- }
- operator DstType()
- {
- return dst;
- }
- size_t Length() const
- {
- return dstCount;
- }
- };
- typedef NarrowWideConverter<malloc_allocator, LPCSTR, LPWSTR> NarrowToWide;
- typedef NarrowWideConverter<malloc_allocator, LPCWSTR, LPSTR> WideToNarrow;
- }
|