| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #pragma once
- namespace UnifiedRegex
- {
- enum CharMapScheme
- {
- CharMapScheme_Linear,
- CharMapScheme_Full
- };
- template <typename C, typename V, CharMapScheme scheme = CharMapScheme_Full>
- class CharMap {};
- template <typename V, CharMapScheme scheme>
- class CharMap<char, V, scheme> : private Chars<char>
- {
- private:
- V map[NumChars];
- public:
- CharMap(V defv)
- {
- for (int i = 0; i < NumChars; i++)
- map[i] = defv;
- }
- void FreeBody(ArenaAllocator* allocator)
- {
- }
- inline void Set(ArenaAllocator* allocator, Char k, V v)
- {
- map[CTU(k)] = v;
- }
- inline V Get(Char k) const
- {
- return map[CTU(k)];
- }
- };
- static const uint MaxCharMapLinearChars = 4;
- template <typename V>
- class CharMap<char16, V, CharMapScheme_Linear> : private Chars<char16>
- {
- template <typename C>
- friend class TextbookBoyerMooreWithLinearMap;
- using typename Chars<char16>::Char;
- using Chars<char16>::CTU;
- private:
- V defv;
- uint map[MaxCharMapLinearChars];
- V lastOcc[MaxCharMapLinearChars];
- public:
- CharMap(V defv) : defv(defv)
- {
- for (uint i = 0; i < MaxCharMapLinearChars; i++)
- {
- map[i] = 0;
- lastOcc[i] = defv;
- }
- }
- inline void Set(uint numLinearChars, Char const * map, V const * lastOcc)
- {
- Assert(numLinearChars <= MaxCharMapLinearChars);
- for (uint i = 0; i < numLinearChars; i++)
- {
- this->map[i] = CTU(map[i]);
- this->lastOcc[i] = lastOcc[i];
- }
- }
- uint GetChar(uint index) const
- {
- Assert(index < MaxCharMapLinearChars);
- __analysis_assume(index < MaxCharMapLinearChars);
- return map[index];
- }
- V GetLastOcc(uint index) const
- {
- Assert(index < MaxCharMapLinearChars);
- __analysis_assume(index < MaxCharMapLinearChars);
- return lastOcc[index];
- }
- inline V Get(uint inputChar) const
- {
- if (map[0] == inputChar)
- return lastOcc[0];
- if (map[1] == inputChar)
- return lastOcc[1];
- if (map[2] == inputChar)
- return lastOcc[2];
- if (map[3] == inputChar)
- return lastOcc[3];
- return defv;
- }
- inline V Get(Char k) const
- {
- return Get(CTU(k));
- }
- };
- template <typename V, CharMapScheme scheme>
- class CharMap<char16, V, scheme> : private Chars<char16>
- {
- // public:
- using Chars<char16>::Char;
- using Chars<char16>::CharWidth;
- using Chars<char16>::CTU;
- private:
- static const int directBits = Chars<char>::CharWidth;
- static const int directSize = Chars<char>::NumChars;
- static const int bitsPerLevel = 4;
- static const int branchingPerLevel = 1 << bitsPerLevel;
- static const uint mask = branchingPerLevel - 1;
- static const int levels = CharWidth / bitsPerLevel;
- inline static uint innerIdx(int level, uint v)
- {
- return (v >> (level * bitsPerLevel)) & mask;
- }
- inline static uint leafIdx(uint v)
- {
- return v & mask;
- }
- struct Node
- {
- virtual void FreeSelf(ArenaAllocator* allocator) = 0;
- virtual void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) = 0;
- virtual V Get(V defv, int level, uint k) const = 0;
- static inline Node* For(ArenaAllocator* allocator, int level, V defv)
- {
- if (level == 0)
- return Anew(allocator, Leaf, defv);
- else
- return Anew(allocator, Inner);
- }
- };
- struct Inner : Node
- {
- Node* children[branchingPerLevel];
- Inner()
- {
- for (int i = 0; i < branchingPerLevel; i++)
- children[i] = 0;
- }
- void FreeSelf(ArenaAllocator* allocator) override
- {
- for (int i = 0; i < branchingPerLevel; i++)
- {
- if (children[i] != 0)
- {
- children[i]->FreeSelf(allocator);
- #if DBG
- children[i] = 0;
- #endif
- }
- }
- Adelete(allocator, this);
- }
- void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) override
- {
- Assert(level > 0);
- uint i = innerIdx(level--, k);
- if (children[i] == 0)
- {
- if (v == defv)
- return;
- children[i] = Node::For(allocator, level, defv);
- }
- children[i]->Set(allocator, defv, level, k, v);
- }
- V Get(V defv, int level, uint k) const override
- {
- Assert(level > 0);
- uint i = innerIdx(level--, k);
- if (children[i] == 0)
- return defv;
- else
- return children[i]->Get(defv, level, k);
- }
- };
- struct Leaf : Node
- {
- V values[branchingPerLevel];
- Leaf(V defv)
- {
- for (int i = 0; i < branchingPerLevel; i++)
- values[i] = defv;
- }
- void FreeSelf(ArenaAllocator* allocator) override
- {
- Adelete(allocator, this);
- }
- void Set(ArenaAllocator* allocator, V defv, int level, uint k, V v) override
- {
- Assert(level == 0);
- values[leafIdx(k)] = v;
- }
- V Get(V defv, int level, uint k) const override
- {
- Assert(level == 0);
- return values[leafIdx(k)];
- }
- };
- Field(BVStatic<directSize>) isInMap;
- Field(V) defv;
- Field(V) directMap[directSize];
- FieldNoBarrier(Node*) root;
- public:
- CharMap(V defv)
- : defv(defv)
- , root(0)
- {
- for (int i = 0; i < directSize; i++)
- directMap[i] = defv;
- }
- void FreeBody(ArenaAllocator* allocator)
- {
- if (root != 0)
- {
- root->FreeSelf(allocator);
- #if DBG
- root = 0;
- #endif
- }
- }
- void Set(ArenaAllocator* allocator, Char kc, V v)
- {
- uint k = CTU(kc);
- if (k < directSize)
- {
- isInMap.Set(k);
- directMap[k] = v;
- }
- else
- {
- if (root == 0)
- {
- if (v == defv)
- return;
- root = Anew(allocator, Inner);
- }
- root->Set(allocator, defv, levels - 1, k, v);
- }
- }
- bool GetNonDirect(uint k, V& lastOcc) const
- {
- Assert(k >= directSize);
- if (root == nullptr)
- {
- return false;
- }
- Node* curr = root;
- for (int level = levels - 1; level > 0; level--)
- {
- Inner* inner = (Inner*)curr;
- uint i = innerIdx(level, k);
- if (inner->children[i] == 0)
- return false;
- else
- curr = inner->children[i];
- }
- Leaf* leaf = (Leaf*)curr;
- lastOcc = leaf->values[leafIdx(k)];
- return true;
- }
- uint GetDirectMapSize() const { return directSize; }
- BOOL IsInDirectMap(uint c) const { Assert(c < directSize); return isInMap.Test(c); }
- V GetDirectMap(uint c) const
- {
- Assert(c < directSize);
- __analysis_assume(c < directSize);
- return directMap[c];
- }
- inline V Get(Char kc) const
- {
- if (CTU(kc) < GetDirectMapSize())
- {
- if (!IsInDirectMap(CTU(kc)))
- {
- return defv;
- }
- return GetDirectMap(CTU(kc));
- }
- else
- {
- V lastOcc;
- if (!GetNonDirect(CTU(kc), lastOcc))
- {
- return defv;
- }
- return lastOcc;
- }
- }
- };
- }
|