| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- namespace UnifiedRegex
- {
- template <typename C>
- class CharSet;
- template <typename C>
- class RuntimeCharSet;
- class CharBitvec : private Chars<char>
- {
- public:
- static const int Width = Chars<char>::CharWidth;
- static const int Size = NumChars;
- private:
- static const int wordSize = sizeof(uint32) * 8;
- static const int vecSize = Size / wordSize;
- static const uint32 ones = (uint32)-1;
- static const uint8 oneBits[Size];
- uint32 vec[vecSize];
- inline static void setrng(uint32 &v, uint l, uint h)
- {
- uint w = h - l + 1;
- if (w == wordSize)
- {
- v = ones;
- }
- else
- {
- v |= ((1U << w) - 1) << l;
- }
- }
- inline static void clearrng(uint32 &v, uint l, uint h)
- {
- uint w = h - l + 1;
- if (w == wordSize)
- {
- v = 0;
- }
- else
- {
- v &= ~(((1U << w) - 1) << l);
- }
- }
- public:
- inline void CloneFrom(const CharBitvec& other)
- {
- for (int w = 0; w < vecSize; w++)
- {
- vec[w] = other.vec[w];
- }
- }
- inline void Clear()
- {
- for (int w = 0; w < vecSize; w++)
- {
- vec[w] = 0;
- }
- }
- inline void SetAll()
- {
- for (int w = 0; w < vecSize; w++)
- {
- vec[w] = ones;
- }
- }
- inline void Set(uint k)
- {
- Assert(k < Size);
- __assume(k < Size);
- if (k < Size)
- {
- vec[k / wordSize] |= 1U << (k % wordSize);
- }
- }
- inline void SetRange(uint l, uint h)
- {
- Assert(l < Size);
- Assert(h < Size);
- __assume(l < Size);
- __assume(h < Size);
- if (l < Size && h < Size)
- {
- if (l == h)
- {
- vec[l / wordSize] |= 1U << (l % wordSize);
- }
- else if (l < h)
- {
- int lw = l / wordSize;
- int hw = h / wordSize;
- int lo = l % wordSize;
- int hio = h % wordSize;
- if (lw == hw)
- {
- setrng(vec[lw], lo, hio);
- }
- else
- {
- setrng(vec[lw], lo, wordSize - 1);
- for (int w = lw + 1; w < hw; w++)
- {
- vec[w] = ones;
- }
- setrng(vec[hw], 0, hio);
- }
- }
- }
- }
- inline void ClearRange(uint l, uint h)
- {
- Assert(l < Size);
- Assert(h < Size);
- __assume(l < Size);
- __assume(h < Size);
- if (l < Size && h < Size)
- {
- if (l == h)
- {
- vec[l / wordSize] &= ~(1U << (l % wordSize));
- }
- else if (l < h)
- {
- int lw = l / wordSize;
- int hw = h / wordSize;
- int lo = l % wordSize;
- int hio = h % wordSize;
- if (lw == hw)
- {
- clearrng(vec[lw], lo, hio);
- }
- else
- {
- clearrng(vec[lw], lo, wordSize - 1);
- for (int w = lw + 1; w < hw; w++)
- {
- vec[w] = 0;
- }
- clearrng(vec[hw], 0, hio);
- }
- }
- }
- }
- inline bool IsEmpty()
- {
- for (int i = 0; i < vecSize; i++)
- {
- if (vec[i] != 0)
- {
- return false;
- }
- }
- return true;
- }
- inline void UnionInPlace(const CharBitvec& other)
- {
- for (int w = 0; w < vecSize; w++)
- {
- vec[w] |= other.vec[w];
- }
- }
- inline bool UnionInPlaceFullCheck(const CharBitvec& other)
- {
- bool isFull = true;
- for (int w = 0; w < vecSize; w++)
- {
- vec[w] |= other.vec[w];
- if (vec[w] != ones)
- {
- isFull = false;
- }
- }
- return isFull;
- }
- inline bool Get(uint k) const
- {
- Assert(k < Size);
- __assume(k < Size);
- return ((vec[k / wordSize] >> (k % wordSize)) & 1) != 0;
- }
- inline bool IsFull() const
- {
- for (int w = 0; w < vecSize; w++)
- {
- if (vec[w] != ones)
- {
- return false;
- }
- }
- return true;
- }
- inline bool IsSubsetOf(const CharBitvec& other) const
- {
- for (int w = 0; w < vecSize; w++)
- {
- uint32 v = other.vec[w];
- if (v != (vec[w] | v))
- {
- return false;
- }
- }
- return true;
- }
- inline bool IsEqualTo(const CharBitvec& other) const
- {
- for (int w = 0; w < vecSize; w++)
- {
- if (vec[w] != other.vec[w])
- {
- return false;
- }
- }
- return true;
- }
- uint Count() const;
- int NextSet(int k) const;
- int NextClear(int k) const;
- template <typename C>
- void ToComplement(ArenaAllocator* allocator, uint base, CharSet<C>& result) const;
- template <typename C>
- void ToEquivClass(ArenaAllocator* allocator, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset = 0x0) const;
- };
- template <typename C>
- class CharSet {};
- struct CharSetNode : protected Chars<char16>
- {
- static const int directBits = Chars<char>::CharWidth;
- static const uint directSize = Chars<char>::NumChars;
- static const uint bitsPerInnerLevel = 4;
- static const uint branchingPerInnerLevel = 1 << bitsPerInnerLevel;
- static const uint innerMask = branchingPerInnerLevel - 1;
- static const int bitsPerLeafLevel = CharBitvec::Width;
- static const int branchingPerLeafLevel = CharBitvec::Size;
- static const uint leafMask = branchingPerLeafLevel - 1;
- static const uint levels = 1 + (CharWidth - bitsPerLeafLevel) / bitsPerInnerLevel;
- inline static uint innerIdx(uint level, uint v)
- {
- return (v >> ((level + 1) * bitsPerInnerLevel)) & innerMask;
- }
- inline static uint indexToValue(uint level, uint index, uint offset)
- {
- Assert((index & innerMask) == index);
- Assert((uint)(1 << ((level + 1) * bitsPerInnerLevel)) > offset);
- return (index << ((level + 1) * bitsPerInnerLevel)) + offset;
- }
- inline static uint leafIdx(uint v)
- {
- return v & leafMask;
- }
- inline static uint lim(uint level)
- {
- return (1U << (bitsPerLeafLevel + level * bitsPerInnerLevel)) - 1;
- }
- inline static uint remain(uint level, uint v)
- {
- return v & lim(level);
- }
- virtual void FreeSelf(ArenaAllocator* allocator) = 0;
- virtual CharSetNode* Clone(ArenaAllocator* allocator) const = 0;
- virtual CharSetNode* Set(ArenaAllocator* allocator, uint level, uint l, uint h) = 0;
- virtual CharSetNode* ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h) = 0;
- virtual CharSetNode* UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other) = 0;
- virtual bool Get(uint level, uint k) const = 0;
- virtual void ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const = 0;
- virtual void ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const = 0;
- virtual void ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const = 0;
- virtual bool IsSubsetOf(uint level, const CharSetNode* other) const = 0;
- virtual bool IsEqualTo(uint level, const CharSetNode* other) const = 0;
- virtual uint Count(uint level) const = 0;
- _Success_(return) virtual bool GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const = 0;
- #if DBG
- virtual bool IsLeaf() const = 0;
- #endif
- static CharSetNode* For(ArenaAllocator* allocator, int level);
- };
- struct CharSetFull : CharSetNode
- {
- private:
- template <typename C>
- void ToEquivClass(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset = 0x0) const;
- public:
- static CharSetFull Instance;
- static CharSetFull * const TheFullNode;
- CharSetFull();
- void FreeSelf(ArenaAllocator* allocator) override;
- CharSetNode* Clone(ArenaAllocator* allocator) const override;
- CharSetNode* Set(ArenaAllocator* allocator, uint level, uint l, uint h) override;
- CharSetNode* ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h) override;
- CharSetNode* UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other) override;
- bool Get(uint level, uint k) const override;
- void ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const override;
- void ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const override;
- void ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const override;
- bool IsSubsetOf(uint level, const CharSetNode* other) const override;
- bool IsEqualTo(uint level, const CharSetNode* other) const override;
- uint Count(uint level) const override;
- _Success_(return) bool GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const override;
- #if DBG
- bool IsLeaf() const override;
- #endif
- };
- struct CharSetInner sealed : CharSetNode
- {
- private:
- template <typename C>
- void ToEquivClass(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<C>& result) const;
- public:
- CharSetNode * children[branchingPerInnerLevel];
- CharSetInner();
- void FreeSelf(ArenaAllocator* allocator) override;
- CharSetNode* Clone(ArenaAllocator* allocator) const override;
- CharSetNode* Set(ArenaAllocator* allocator, uint level, uint l, uint h) override;
- CharSetNode* ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h) override;
- CharSetNode* UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other) override;
- bool Get(uint level, uint k) const override;
- void ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const override;
- void ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const override;
- void ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const override;
- bool IsSubsetOf(uint level, const CharSetNode* other) const override;
- bool IsEqualTo(uint level, const CharSetNode* other) const override;
- uint Count(uint level) const override;
- _Success_(return) bool GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const override;
- #if DBG
- bool IsLeaf() const override;
- #endif
- };
- struct CharSetLeaf sealed : CharSetNode
- {
- private:
- template <typename C>
- void ToEquivClass(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset = 0x0) const;
- public:
- CharBitvec vec;
- CharSetLeaf();
- void FreeSelf(ArenaAllocator* allocator) override;
- CharSetNode* Clone(ArenaAllocator* allocator) const override;
- CharSetNode* Set(ArenaAllocator* allocator, uint level, uint l, uint h) override;
- CharSetNode* ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h) override;
- CharSetNode* UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other) override;
- bool Get(uint level, uint k) const override;
- void ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const override;
- void ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const override;
- void ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const override;
- bool IsSubsetOf(uint level, const CharSetNode* other) const override;
- bool IsEqualTo(uint level, const CharSetNode* other) const override;
- uint Count(uint level) const override;
- _Success_(return) bool GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const override;
- #if DBG
- bool IsLeaf() const override;
- #endif
- };
- template <>
- class CharSet<char16> : private Chars<char16>
- {
- public:
- static const uint MaxCompact = 4;
- static const uint emptySlot = (uint)-1;
- struct CompactRep
- {
- // 1 + number of distinct characters, 1..MaxCompact+1
- size_t countPlusOne;
- // Characters, in no particular order, or (uint)-1 for tail empty slots
- uint cs[MaxCompact];
- uint8 padding[sizeof(CharBitvec) - sizeof(uint) * MaxCompact];
- };
- struct FullRep
- {
- // Trie for remaining characters. Pointer value will be 0 or >> MaxCompact.
- CharSetNode* root;
- // Entries for first 256 characters
- CharBitvec direct;
- };
- union Rep
- {
- struct CompactRep compact;
- struct FullRep full;
- } rep;
- static const int compactSize = sizeof(CompactRep);
- static const int fullSize = sizeof(FullRep);
- inline bool IsCompact() const { return rep.compact.countPlusOne - 1 <= MaxCompact; }
- void SwitchRepresentations(ArenaAllocator* allocator);
- void Sort();
- public:
- CharSet();
- void FreeBody(ArenaAllocator* allocator);
- void Clear(ArenaAllocator* allocator);
- void CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other);
- void CloneNonSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<Char>& other);
- void CloneSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<Char>& other);
- inline void Set(ArenaAllocator* allocator, Char kc) { SetRange(allocator, kc, kc); }
- void SetRange(ArenaAllocator* allocator, Char lc, Char hc);
- void SubtractRange(ArenaAllocator* allocator, Char lc, Char hc);
- void SetRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs);
- void SetNotRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs);
- void UnionInPlace(ArenaAllocator* allocator, const CharSet<Char>& other);
- _Success_(return) bool GetNextRange(Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar);
- bool Get_helper(uint k) const;
- inline bool Get(Char kc) const
- {
- if (IsCompact())
- {
- Assert(MaxCompact == 4);
- return rep.compact.cs[0] == CTU(kc) ||
- rep.compact.cs[1] == CTU(kc) ||
- rep.compact.cs[2] == CTU(kc) ||
- rep.compact.cs[3] == CTU(kc);
- }
- else
- {
- if (CTU(kc) < CharSetNode::directSize)
- {
- return rep.full.direct.Get(CTU(kc));
- }
- else if (rep.full.root == 0)
- {
- return false;
- }
- else
- {
- return Get_helper(CTU(kc));
- }
- }
- }
- inline bool IsEmpty() const
- {
- return rep.compact.countPlusOne == 1;
- }
- inline bool IsSingleton() const
- {
- return rep.compact.countPlusOne == 2;
- }
- // Helpers to clean up the code
- inline uint GetCompactLength() const
- {
- Assert(IsCompact());
- return (uint)(rep.compact.countPlusOne - 1u);
- }
- inline void SetCompactLength(size_t length)
- {
- rep.compact.countPlusOne = length + 1;
- }
- inline uint GetCompactCharU(uint index) const
- {
- Assert(index < this->GetCompactLength());
- Assert(IsCompact());
- Assert(rep.compact.cs[index] <= MaxUChar);
- return rep.compact.cs[index];
- }
- inline Char GetCompactChar(uint index) const
- {
- return (Char)(GetCompactCharU(index));
- }
- //Replaces an existing character with a new value
- inline void ReplaceCompactCharU(uint index, uint value)
- {
- Assert(index < this->GetCompactLength());
- Assert(IsCompact());
- Assert(value <= MaxUChar);
- rep.compact.cs[index] = value;
- }
- inline void ClearCompactChar(uint index)
- {
- Assert(index < this->GetCompactLength());
- Assert(IsCompact());
- rep.compact.cs[index] = emptySlot;
- }
- // Adds the character to the end, assuming there is enough space. (Assert in place)
- // Increments count.
- inline void AddCompactCharU(uint value)
- {
- Assert(this->GetCompactLength() < MaxCompact);
- Assert(IsCompact());
- rep.compact.cs[this->GetCompactLength()] = value;
- rep.compact.countPlusOne += 1;
- }
- // Adds the character to the end, assuming there is enough space. (Assert in place)
- // Increments count.
- inline void AddCompactChar(Char value)
- {
- AddCompactCharU((Char)(value));
- }
- // This performs a check to see if the index is the last char, if so sets it to emptySlot
- // If not, replaces it with last index.
- inline void RemoveCompactChar(uint index)
- {
- Assert(index < this->GetCompactLength());
- Assert(IsCompact());
- if (index == this->GetCompactLength() - 1)
- {
- this->ClearCompactChar(index);
- }
- else
- {
- this->ReplaceCompactCharU(index, this->GetCompactCharU((uint)this->GetCompactLength() - 1));
- }
- rep.compact.countPlusOne -= 1;
- }
- inline char16 Singleton() const
- {
- Assert(IsSingleton());
- Assert(rep.compact.cs[0] <= MaxUChar);
- return UTC(rep.compact.cs[0]);
- }
- int GetCompactEntries(uint max, __out_ecount(max) Char* entries) const;
- bool IsSubsetOf(const CharSet<Char>& other) const;
- bool IsEqualTo(const CharSet<Char>& other) const;
- inline uint Count() const
- {
- if (IsCompact())
- {
- return (uint)rep.compact.countPlusOne - 1;
- }
- else if (rep.full.root == 0)
- {
- return rep.full.direct.Count();
- }
- else
- {
- //The bit vector
- Assert(rep.full.root == CharSetFull::TheFullNode || rep.full.root->Count(CharSetNode::levels - 1) <= 0xFF00);
- return rep.full.direct.Count() + (rep.full.root == CharSetFull::TheFullNode ? 0xFF00 : rep.full.root->Count(CharSetNode::levels - 1));
- }
- }
- // NOTE: These are not 'const' methods since they may sort the compact representation internally
- void ToComplement(ArenaAllocator* allocator, CharSet<Char>& result);
- void ToEquivClass(ArenaAllocator* allocator, CharSet<Char>& result);
- void ToEquivClassCP(ArenaAllocator* allocator, CharSet<codepoint_t>& result, codepoint_t baseOffset);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Print(DebugWriter* w) const;
- #endif
- };
- template <>
- class CharSet<codepoint_t> : private Chars<codepoint_t>
- {
- static const int NumberOfPlanes = 17;
- private:
- // Character planes are composed of 65536 characters each.
- // First plane is the Basic Multilingual Plane (characters 0 - 65535)
- // Every subsequent plane also stores characters in the form [0 - 65535]; to get the actual value, add 'index * 0x10000' to it
- CharSet<char16> characterPlanes[NumberOfPlanes];
- // Takes a character, and returns the index of the CharSet<char16> that holds it.
- inline int CharToIndex(Char c) const
- {
- Assert(c <= Chars<codepoint_t>::MaxUChar);
- return (int)(CTU(c) / (Chars<char16>::MaxUChar + 1));
- }
- // Takes a character, and removes the offset to make it < 0x10000
- inline char16 RemoveOffset(Char c) const
- {
- Assert(c <= Chars<codepoint_t>::MaxUChar);
- return (char16)(CTU(c) % 0x10000);
- }
- // Takes a character, and removes the offset to make it < 0x10000
- inline Char AddOffset(char16 c, int index) const
- {
- Assert(c <= Chars<char16>::MaxUChar);
- Assert(index >= 0);
- Assert(index < NumberOfPlanes);
- return ((Char)c) + 0x10000 * index;
- }
- public:
- CharSet();
- void FreeBody(ArenaAllocator* allocator);
- void Clear(ArenaAllocator* allocator);
- void CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other);
- void CloneSimpleCharsTo(ArenaAllocator* allocator, CharSet<char16>& other) const;
- inline void CloneNonSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<char16>& other)
- {
- Assert(this->SimpleCharCount() > 0);
- AssertMsg(this->ContainSurrogateCodeUnits(), "This doesn't contain surrogate code units, a simple clone is faster.");
- this->characterPlanes[0].CloneNonSurrogateCodeUnitsTo(allocator, other);
- }
- inline void CloneSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<char16>& other)
- {
- Assert(this->SimpleCharCount() > 0);
- AssertMsg(this->ContainSurrogateCodeUnits(), "This doesn't contain surrogate code units, will not produce any result.");
- this->characterPlanes[0].CloneSurrogateCodeUnitsTo(allocator, other);
- }
- inline void Set(ArenaAllocator* allocator, Char kc) { SetRange(allocator, kc, kc); }
- inline bool ContainSurrogateCodeUnits()
- {
- char16 outLower = 0xFFFF, ignore = 0x0;
- return this->characterPlanes[0].GetNextRange(0xD800, &outLower, &ignore) ? outLower <= 0xDFFF : false;
- }
- void SetRange(ArenaAllocator* allocator, Char lc, Char hc);
- void SetRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs);
- void SetNotRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs);
- void UnionInPlace(ArenaAllocator* allocator, const CharSet<Char>& other);
- void UnionInPlace(ArenaAllocator* allocator, const CharSet<char16>& other);
- _Success_(return) bool GetNextRange(Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar);
- inline bool Get(Char kc) const
- {
- return this->characterPlanes[CharToIndex(kc)].Get(RemoveOffset(kc));
- }
- inline bool IsEmpty() const
- {
- for (int i = 0; i < NumberOfPlanes; i++)
- {
- if (!this->characterPlanes[i].IsEmpty())
- {
- return false;
- }
- }
- return true;
- }
- inline bool IsSimpleCharASingleton() const
- {
- return this->characterPlanes[0].IsSingleton();
- }
- inline char16 SimpleCharSingleton() const
- {
- return this->characterPlanes[0].Singleton();
- }
- inline bool IsSingleton() const
- {
- return this->Count() == 1;
- }
- inline codepoint_t Singleton() const
- {
- Assert(IsSingleton());
- for (int i = 0; i < NumberOfPlanes; i++)
- {
- if (this->characterPlanes[i].IsSingleton())
- {
- return AddOffset(this->characterPlanes[i].Singleton(), i);
- }
- }
- AssertMsg(false, "Should not reach here, first Assert verifies we are a singleton.");
- return INVALID_CODEPOINT;
- }
- bool IsSubsetOf(const CharSet<Char>& other) const;
- bool IsEqualTo(const CharSet<Char>& other) const;
- inline uint Count() const
- {
- uint totalCount = 0;
- for (int i = 0; i < NumberOfPlanes; i++)
- {
- totalCount += this->characterPlanes[i].Count();
- }
- return totalCount;
- }
- inline uint SimpleCharCount() const
- {
- return this->characterPlanes[0].Count();
- }
- // NOTE: These are not 'const' methods since they may sort the compact representation internally
- void ToComplement(ArenaAllocator* allocator, CharSet<Char>& result);
- void ToSimpleComplement(ArenaAllocator* allocator, CharSet<codepoint_t>& result);
- void ToSimpleComplement(ArenaAllocator* allocator, CharSet<char16>& result);
- void ToEquivClass(ArenaAllocator* allocator, CharSet<Char>& result);
- void ToSurrogateEquivClass(ArenaAllocator* allocator, CharSet<Char>& result);
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Print(DebugWriter* w) const;
- #endif
- };
- template <>
- class RuntimeCharSet<char16> : private Chars<char16>
- {
- private:
- // Trie for remaining characters. Pointer value will be 0 or >> MaxCompact.
- CharSetNode * root;
- // Entries for first 256 characters
- CharBitvec direct;
- public:
- RuntimeCharSet();
- void FreeBody(ArenaAllocator* allocator);
- void CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other);
- bool Get_helper(uint k) const;
- inline bool Get(Char kc) const
- {
- if (CTU(kc) < CharSetNode::directSize)
- {
- return direct.Get(CTU(kc));
- }
- else if (root == 0)
- {
- return false;
- }
- else
- {
- return Get_helper(CTU(kc));
- }
- }
- #if ENABLE_REGEX_CONFIG_OPTIONS
- void Print(DebugWriter* w) const;
- #endif
- };
- typedef CharSet<char16> UnicodeCharSet;
- typedef RuntimeCharSet<char16> UnicodeRuntimeCharSet;
- }
|