| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "Utf8Codex.h"
- #ifndef _WIN32
- #undef _Analysis_assume_
- #define _Analysis_assume_(expr)
- #endif
- #ifdef _MSC_VER
- //=============================
- // Disabled Warnings
- //=============================
- #pragma warning(push)
- #pragma warning(disable: 4127) // constant expression for template parameter
- #pragma warning(disable: 26451) // size-conversion/arithmetic-operation ordering
- #endif
- namespace utf8
- {
- const unsigned int mAlignmentMask = 0x3;
- inline bool IsAligned(LPCUTF8 pch)
- {
- return (reinterpret_cast<size_t>(pch) & mAlignmentMask) == 0;
- }
- inline bool IsAligned(LPCOLESTR pch)
- {
- return (reinterpret_cast<size_t>(pch) & mAlignmentMask) == 0;
- }
- inline bool ShouldFastPath(LPCUTF8 pb, LPCOLESTR pch)
- {
- return (reinterpret_cast<size_t>(pb) & mAlignmentMask) == 0 && (reinterpret_cast<size_t>(pch) & mAlignmentMask) == 0;
- }
- inline size_t EncodedBytes(char16 prefix)
- {
- CodexAssert(0 == (prefix & 0xFF00)); // prefix must really be a byte. We use char16 for as a convenience for the API.
- // The number of bytes in an UTF8 encoding is determined by the 4 high-order bits of the first byte.
- // 0xxx -> 1
- // 10xx -> 1 (invalid)
- // 110x -> 2
- // 1110 -> 3
- // 1111 -> 4
- // If this value is XOR with 0xF0 and shift 3 bits to the right it can be used as an
- // index into a 16 element 2 bit array encoded as a uint32 of n - 1 where n is the number
- // of bits in the encoding.
- // The XOR prefix bits mapped to n - 1.
- // 1xxx -> 00 (8 - 15)
- // 01xx -> 00 (4 - 7)
- // 001x -> 01 (2 - 3)
- // 0001 -> 10 (1)
- // 0000 -> 11 (0)
- // This produces the following bit sequence:
- // 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
- // 00 00 00 00 00 00 00 00 00 00 00 00 01 01 10 11
- // which is 0x5B
- return ((0x5B >> (((prefix ^ 0xF0) >> 3) & 0x1E)) & 0x03) + 1;
- }
- const char16 WCH_UTF16_HIGH_FIRST = char16(0xd800);
- const char16 WCH_UTF16_HIGH_LAST = char16(0xdbff);
- const char16 WCH_UTF16_LOW_FIRST = char16(0xdc00);
- const char16 WCH_UTF16_LOW_LAST = char16(0xdfff);
- char16 GetUnknownCharacter(DecodeOptions options = doDefault)
- {
- if ((options & doThrowOnInvalidWCHARs) != 0)
- {
- throw InvalidWideCharException();
- }
- return char16(UNICODE_UNKNOWN_CHAR_MARK);
- }
- inline BOOL InRange(const char16 ch, const char16 chMin, const char16 chMax)
- {
- return (unsigned)(ch - chMin) <= (unsigned)(chMax - chMin);
- }
- BOOL IsValidWideChar(char16 ch)
- {
- return (ch < 0xfdd0) || ((ch > 0xfdef) && (ch <= 0xffef)) || ((ch >= 0xfff9) && (ch <= 0xfffd));
- }
- inline BOOL IsHighSurrogateChar(char16 ch)
- {
- return InRange( ch, WCH_UTF16_HIGH_FIRST, WCH_UTF16_HIGH_LAST );
- }
- inline BOOL IsLowSurrogateChar(char16 ch)
- {
- return InRange( ch, WCH_UTF16_LOW_FIRST, WCH_UTF16_LOW_LAST );
- }
- _At_(ptr, _In_reads_(end - ptr) _Post_satisfies_(ptr >= _Old_(ptr) - 1 && ptr <= end))
- inline char16 DecodeTail(char16 c1, LPCUTF8& ptr, LPCUTF8 end, DecodeOptions& options, bool *chunkEndsAtTruncatedSequence)
- {
- char16 ch = 0;
- BYTE c2, c3, c4;
- switch (EncodedBytes(c1))
- {
- case 1:
- if (c1 < 0x80) return c1;
- if ((options & doSecondSurrogatePair) != 0)
- {
- // We're in the middle of decoding a surrogate pair from a four-byte utf8 sequence.
- // The high word has already been returned, but without advancing ptr, which was on byte 1.
- // ptr was then advanced externally when reading c1, which is byte 1, so ptr is now on byte 2.
- // byte 1 must have been a continuation byte, hence will be in case 1.
- ptr--; // back to byte 1
- c1 = ptr[-1]; // the original first byte
- // ptr is now on c2. We must also have c3 and c4, otherwise doSecondSurrogatePair won't set.
- _Analysis_assume_(ptr + 2 < end);
- goto LFourByte;
- }
- // 10xxxxxx (trail byte appearing in a lead byte position
- return GetUnknownCharacter(options);
- case 2:
- // Look for an overlong utf-8 sequence.
- if (ptr >= end)
- {
- if ((options & doChunkedEncoding) != 0)
- {
- // The is a sequence that spans a chunk, push ptr back to the beginning of the sequence.
- ptr--;
- if (chunkEndsAtTruncatedSequence)
- {
- *chunkEndsAtTruncatedSequence = true;
- }
- }
- return GetUnknownCharacter(options);
- }
- c2 = *ptr++;
- // 110XXXXx 10xxxxxx
- // UTF16 | UTF8 1st byte 2nd byte
- // U+0080..U+07FF | C2..DF 80..BF
- if (
- InRange(c1, 0xC2, 0xDF)
- && InRange(c2, 0x80, 0xBF)
- )
- {
- ch |= WCHAR(c1 & 0x1f) << 6; // 0x0080 - 0x07ff
- ch |= WCHAR(c2 & 0x3f);
- if (!IsValidWideChar(ch) && ((options & doAllowInvalidWCHARs) == 0))
- {
- ch = GetUnknownCharacter(options);
- }
- }
- else
- {
- ptr--;
- ch = GetUnknownCharacter(options);
- }
- break;
- case 3:
- // 1110XXXX 10Xxxxxx 10xxxxxx
- // Look for overlong utf-8 sequence.
- if (ptr + 1 >= end)
- {
- if ((options & doChunkedEncoding) != 0)
- {
- // The is a sequence that spans a chunk, push ptr back to the beginning of the sequence.
- ptr--;
- if (chunkEndsAtTruncatedSequence)
- {
- *chunkEndsAtTruncatedSequence = true;
- }
- }
- return GetUnknownCharacter(options);
- }
- // UTF16 | UTF8 1st byte 2nd byte 3rd byte
- // U+0800..U+0FFF | E0 A0..BF 80..BF
- // U+1000..U+CFFF | E1..EC 80..BF 80..BF
- // U+D000..U+D7FF | ED 80..9F 80..BF
- // U+E000..U+FFFF | EE..EF 80..BF 80..BF
- c2 = ptr[0];
- c3 = ptr[1];
- if (
- // any following be true
- (c1 == 0xE0
- && InRange(c2, 0xA0, 0xBF)
- && InRange(c3, 0x80, 0xBF))
- ||
- (InRange(c1, 0xE1, 0xEC)
- && InRange(c2, 0x80, 0xBF)
- && InRange(c3, 0x80, 0xBF))
- ||
- (c1 == 0xED
- && InRange(c2, 0x80, 0x9F)
- && InRange(c3, 0x80, 0xBF))
- ||
- (InRange(c1, 0xEE, 0xEF)
- && InRange(c2, 0x80, 0xBF)
- && InRange(c3, 0x80, 0xBF))
- ||
- (((options & doAllowThreeByteSurrogates) != 0)
- &&
- c1 == 0xED
- && InRange(c2, 0x80, 0xBF)
- && InRange(c3, 0x80, 0xBF)
- )
- )
- {
- ch = WCHAR(c1 & 0x0f) << 12; // 0x0800 - 0xffff
- ch |= WCHAR(c2 & 0x3f) << 6; // 0x0080 - 0x07ff
- ch |= WCHAR(c3 & 0x3f);
- if (!IsValidWideChar(ch) && ((options & (doAllowThreeByteSurrogates | doAllowInvalidWCHARs)) == 0))
- {
- ch = GetUnknownCharacter(options);
- }
- ptr += 2;
- }
- else
- {
- ch = GetUnknownCharacter(options);
- // Windows OS 1713952. Only drop the illegal leading byte
- // Retry next byte.
- // ptr is already advanced.
- }
- break;
- case 4:
- LFourByte:
- // 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx or 11111xxx ....
- // NOTE: 11111xxx is not supported
- if (ptr + 2 >= end)
- {
- if ((options & doChunkedEncoding) != 0)
- {
- // The is a sequence that spans a chunk, push ptr back to the beginning of the sequence.
- ptr--;
- if (chunkEndsAtTruncatedSequence)
- {
- *chunkEndsAtTruncatedSequence = true;
- }
- }
- ch = GetUnknownCharacter(options);
- break;
- }
- c2 = ptr[0];
- c3 = ptr[1];
- c4 = ptr[2];
- // UTF16 | UTF8 1st byte 2nd byte 3rd byte 4th byte
- // U+10000..U+3FFFF | F0 90..BF 80..BF 80..BF
- // U+40000..U+FFFFF | F1..F3 80..BF 80..BF 80..BF
- // U+100000..U+10FFFF | F4 80..8F 80..BF 80..BF
- if (! // NOT Unicode well-formed byte sequences
- (
- // any following be true
- (c1 == 0xF0
- && InRange(c2, 0x90,0xBF)
- && InRange(c3, 0x80,0xBF)
- && InRange(c4, 0x80,0xBF))
- ||
- (InRange(c1, 0xF1, 0xF3)
- && InRange(c2, 0x80,0xBF)
- && InRange(c3, 0x80,0xBF)
- && InRange(c4, 0x80,0xBF))
- ||
- (c1 == 0xF4
- && InRange(c2, 0x80,0x8F)
- && InRange(c3, 0x80,0xBF)
- && InRange(c4, 0x80,0xBF))
- )
- )
- {
- // Windows OS 1713952. Only drop the illegal leading byte.
- // Retry next byte.
- // ptr is already advanced 1.
- ch = GetUnknownCharacter(options);
- break;
- }
- if ((options & doSecondSurrogatePair) == 0)
- {
- // Decode high 10 bits of utf-8 20 bit char
- ch = WCHAR(c1 & 0x07) << 2;
- ch |= WCHAR(c2 & 0x30) >> 4;
- ch = (ch - 1) << 6; // ch == 0000 00ww ww00 0000
- ch |= WCHAR(c2 & 0x0f) << 2; // ch == 0000 00ww wwzz zz00
- ch |= WCHAR(c3 & 0x30) >> 4; // ch == 0000 00ww wwzz zzyy
- // Encode first word of utf-16 surrogate pair
- ch += 0xD800;
- // Remember next call must return second word
- options = (DecodeOptions)(options | doSecondSurrogatePair);
- // Leave ptr on byte 1, this way:
- // - callers who test that ptr has been advanced by utf8::Decode will see progress for
- // both words of the surrogate pair.
- // - callers who calculate the number of multi-unit chars by subtracting after from before ptr
- // will accumulate 0 for first word and 2 for second, thus utf8 chars equals 2 utf16 chars + 2
- // multi-unit chars, as it should be.
- }
- else
- {
- // Decode low 10 bits of utf-8 20 bit char
- ch = WCHAR(c3 & 0x0f) << 6; // ch == 0000 00yy yy00 0000
- ch |= WCHAR(c4 & 0x3f); // ch == 0000 00yy yyxx xxxx
- // Encode second word of utf-16 surrogate pair
- ch += 0xDC00;
- // We're done with this char
- options = (DecodeOptions)(options & ~doSecondSurrogatePair);
- ptr += 3; // remember, got here by subtracting one from ptr in case 1, so effective increment is 2
- }
- break;
- }
- return ch;
- }
- LPCUTF8 NextCharFull(LPCUTF8 ptr)
- {
- return ptr + EncodedBytes(*ptr);
- }
- LPCUTF8 PrevCharFull(LPCUTF8 ptr, LPCUTF8 start)
- {
- if (ptr > start)
- {
- LPCUTF8 current = ptr - 1;
- while (current > start && (*current & 0xC0) == 0x80)
- current--;
- if (NextChar(current) == ptr)
- return current;
- // It is not a valid encoding, just go back one character.
- return ptr - 1;
- }
- else
- return ptr;
- }
-
- _Use_decl_annotations_
- size_t DecodeUnitsInto(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options, bool *chunkEndsAtTruncatedSequence)
- {
- DecodeOptions localOptions = options;
- if (chunkEndsAtTruncatedSequence)
- {
- *chunkEndsAtTruncatedSequence = false;
- }
- LPCUTF8 p = pbUtf8;
- char16 *dest = buffer;
- if (!ShouldFastPath(p, dest)) goto LSlowPath;
- LFastPath:
- while (p + 3 < pbEnd)
- {
- unsigned bytes = *(unsigned *)p;
- if ((bytes & 0x80808080) != 0) goto LSlowPath;
- ((uint32 *)dest)[0] = (char16(bytes) & 0x00FF) | ((char16(bytes) & 0xFF00) << 8);
- ((uint32 *)dest)[1] = (char16(bytes >> 16) & 0x00FF) | ((char16(bytes >> 16) & 0xFF00) << 8);
- p += 4;
- dest += 4;
- }
- LSlowPath:
- while (p < pbEnd)
- {
- LPCUTF8 s = p;
- char16 chDest = Decode(p, pbEnd, localOptions, chunkEndsAtTruncatedSequence);
- if (s < p)
- {
- // We decoded the character, store it
- *dest++ = chDest;
- }
- else
- {
- // Nothing was converted. This might happen at the end of a buffer with doChunkedEncoding.
- break;
- }
- if (ShouldFastPath(p, dest)) goto LFastPath;
- }
- pbUtf8 = p;
- return dest - buffer;
- }
- _Use_decl_annotations_
- size_t DecodeUnitsIntoAndNullTerminate(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options, bool *chunkEndsAtTruncatedSequence)
- {
- size_t result = DecodeUnitsInto(buffer, pbUtf8, pbEnd, options, chunkEndsAtTruncatedSequence);
- buffer[result] = 0;
- return result;
- }
- _Use_decl_annotations_
- size_t DecodeUnitsIntoAndNullTerminateNoAdvance(char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options, bool *chunkEndsAtTruncatedSequence)
- {
- return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options, chunkEndsAtTruncatedSequence);
- }
- bool CharsAreEqual(LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, DecodeOptions options)
- {
- DecodeOptions localOptions = options;
- while (bch < end)
- {
- if (*pch++ != utf8::Decode(bch, end, localOptions))
- {
- return false;
- }
- }
- return true;
- }
- template <Utf8EncodingKind encoding, bool countBytesOnly = false>
- __range(0, cbDest)
- size_t EncodeIntoImpl(
- _When_(!countBytesOnly, _Out_writes_(cbDest)) utf8char_t *destBuffer,
- __range(0, cchSource * 3) size_t cbDest,
- _In_reads_(cchSource) const char16 *source,
- __range(0, INT_MAX) charcount_t cchSource)
- {
- charcount_t cch = cchSource; // SAL analysis gets confused by EncodeTrueUtf8's dest buffer requirement unless we alias cchSource with a local
- LPUTF8 dest = destBuffer;
- utf8char_t *bufferEnd = &destBuffer[cbDest];
- CodexAssertOrFailFast(dest <= bufferEnd);
- if (!ShouldFastPath(dest, source)) goto LSlowPath;
- LFastPath:
- while (cch >= 4)
- {
- uint32 first = ((const uint32 *)source)[0];
- if ( (first & 0xFF80FF80) != 0) goto LSlowPath;
- uint32 second = ((const uint32 *)source)[1];
- if ( (second & 0xFF80FF80) != 0) goto LSlowPath;
- if (!countBytesOnly)
- {
- CodexAssertOrFailFast(dest + 4 <= bufferEnd);
- *(uint32 *)dest = (first & 0x0000007F) | ((first & 0x007F0000) >> 8) | ((second & 0x0000007f) << 16) | ((second & 0x007F0000) << 8);
- }
- dest += 4;
- source += 4;
- cch -= 4;
- }
- LSlowPath:
- if (encoding == Utf8EncodingKind::Cesu8)
- {
- while (cch-- > 0)
- {
- dest = Encode<countBytesOnly>(*source++, dest, bufferEnd);
- if (ShouldFastPath(dest, source)) goto LFastPath;
- }
- }
- else
- {
- while (cch-- > 0)
- {
- // We increment the source pointer here since at least one utf16 code unit is read here
- // If the code unit turns out to be the high surrogate in a surrogate pair, then
- // EncodeTrueUtf8 will consume the low surrogate code unit too by decrementing cch
- // and incrementing source
- dest = EncodeTrueUtf8<countBytesOnly>(*source++, &source, &cch, dest, bufferEnd);
- if (ShouldFastPath(dest, source)) goto LFastPath;
- }
- }
- return dest - destBuffer;
- }
- template <Utf8EncodingKind encoding>
- __range(0, cbDest)
- size_t EncodeInto(
- _Out_writes_(cbDest) utf8char_t *dest,
- __range(0, cchSource * 3) size_t cbDest,
- _In_reads_(cchSource) const char16 *source,
- __range(0, INT_MAX) charcount_t cchSource)
- {
- return EncodeIntoImpl<encoding>(dest, cbDest, source, cchSource);
- }
- template <Utf8EncodingKind encoding>
- __range(0, cbDest)
- size_t EncodeIntoAndNullTerminate(
- _Out_writes_z_(cbDest) utf8char_t *dest,
- __range(1, cchSource * 3 + 1) size_t cbDest, // must be at least large enough to write null terminator
- _In_reads_(cchSource) const char16 *source,
- __range(0, INT_MAX) charcount_t cchSource)
- {
- size_t destWriteMaxBytes = cbDest - 1; // leave room for null terminator
- size_t result = EncodeIntoImpl<encoding>(dest, destWriteMaxBytes, source, cchSource);
- dest[result] = 0;
- return result;
- }
- template
- __range(0, cbDest)
- size_t EncodeInto<Utf8EncodingKind::Cesu8>(
- _Out_writes_(cbDest) utf8char_t *dest,
- __range(0, cchSource * 3) size_t cbDest,
- _In_reads_(cchSource) const char16 *source,
- __range(0, INT_MAX) charcount_t cchSource);
- template
- __range(0, cbDest)
- size_t EncodeInto<Utf8EncodingKind::TrueUtf8>(
- _Out_writes_(cbDest) utf8char_t *dest,
- __range(0, cchSource * 3) size_t cbDest,
- _In_reads_(cchSource) const char16 *source,
- __range(0, INT_MAX) charcount_t cchSource);
- template
- __range(0, cbDest)
- size_t EncodeIntoAndNullTerminate<Utf8EncodingKind::Cesu8>(
- _Out_writes_z_(cbDest) utf8char_t *dest,
- __range(1, cchSource * 3 + 1) size_t cbDest,
- _In_reads_(cchSource) const char16 *source,
- __range(0, INT_MAX) charcount_t cchSource);
- template
- __range(0, cbDest)
- size_t EncodeIntoAndNullTerminate<Utf8EncodingKind::TrueUtf8>(
- _Out_writes_z_(cbDest) utf8char_t *dest,
- __range(1, cchSource * 3 + 1) size_t cbDest,
- _In_reads_(cchSource) const char16 *source,
- __range(0, INT_MAX) charcount_t cchSource);
- // Since we are not actually encoding, the return value is bounded on cch
- __range(0, cch * 3)
- size_t CountTrueUtf8(__in_ecount(cch) const char16 *source, charcount_t cch)
- {
- return EncodeIntoImpl<Utf8EncodingKind::TrueUtf8, true /*count only*/>(nullptr, 0, source, cch);
- }
- // Convert the character index into a byte index.
- size_t CharacterIndexToByteIndex(__in_ecount(cbLength) LPCUTF8 pch, size_t cbLength, charcount_t cchIndex, DecodeOptions options)
- {
- return CharacterIndexToByteIndex(pch, cbLength, cchIndex, 0, 0, options);
- }
- size_t CharacterIndexToByteIndex(__in_ecount(cbLength) LPCUTF8 pch, size_t cbLength, const charcount_t cchIndex, size_t cbStartIndex, charcount_t cchStartIndex, DecodeOptions options)
- {
- DecodeOptions localOptions = options;
- LPCUTF8 pchCurrent = pch + cbStartIndex;
- LPCUTF8 pchEnd = pch + cbLength;
- LPCUTF8 pchEndMinus4 = pch + (cbLength - 4);
- charcount_t i = cchIndex - cchStartIndex;
- // Avoid using a reinterpret_cast to start a misaligned read.
- if (!IsAligned(pchCurrent)) goto LSlowPath;
- LFastPath:
- // Skip 4 bytes at a time.
- while (pchCurrent < pchEndMinus4 && i > 4)
- {
- uint32 ch4 = *reinterpret_cast<const uint32 *>(pchCurrent);
- if ((ch4 & 0x80808080) == 0)
- {
- pchCurrent += 4;
- i -= 4;
- }
- else break;
- }
- LSlowPath:
- while (pchCurrent < pchEnd && i > 0)
- {
- Decode(pchCurrent, pchEnd, localOptions);
- i--;
- // Try to return to the fast path avoiding misaligned reads.
- if (i > 4 && IsAligned(pchCurrent)) goto LFastPath;
- }
- return i > 0 ? cbLength : pchCurrent - pch;
- }
- // Convert byte index into character index
- charcount_t ByteIndexIntoCharacterIndex(__in_ecount(cbIndex) LPCUTF8 pch, size_t cbIndex, DecodeOptions options)
- {
- DecodeOptions localOptions = options;
- LPCUTF8 pchCurrent = pch;
- LPCUTF8 pchEnd = pch + cbIndex;
- LPCUTF8 pchEndMinus4 = pch + (cbIndex - 4);
- charcount_t i = 0;
- // Avoid using a reinterpret_cast to start a misaligned read.
- if (!IsAligned(pchCurrent)) goto LSlowPath;
- LFastPath:
- // Skip 4 bytes at a time.
- while (pchCurrent < pchEndMinus4)
- {
- uint32 ch4 = *reinterpret_cast<const uint32 *>(pchCurrent);
- if ((ch4 & 0x80808080) == 0)
- {
- pchCurrent += 4;
- i += 4;
- }
- else break;
- }
- LSlowPath:
- while (pchCurrent < pchEnd)
- {
- LPCUTF8 s = pchCurrent;
- Decode(pchCurrent, pchEnd, localOptions);
- if (s == pchCurrent) break;
- i++;
- // Try to return to the fast path avoiding misaligned reads.
- if (IsAligned(pchCurrent)) goto LFastPath;
- }
- return i;
- }
- } // namespace utf8
- #ifdef _MSC_VER
- #pragma warning(pop)
- #endif
|