| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Copyright (c) 2021 ChakraCore Project Contributors. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- // From Cormen, Leiserson and Rivest, ch 34.
- #pragma once
- namespace UnifiedRegex
- {
- template <typename C>
- struct TextbookBoyerMooreSetup : private Chars<C>
- {
- private:
- typedef typename Chars<C>::Char Char;
- template <typename T>
- friend class TextbookBoyerMoore;
- template <typename T>
- friend class TextbookBoyerMooreWithLinearMap;
- public:
- enum Scheme
- {
- DefaultScheme,
- LinearScheme
- };
- TextbookBoyerMooreSetup(Char const * pat, CharCount patLen) : pat(pat), patLen(patLen) { Init(); }
- Scheme GetScheme() const { return scheme; }
- static int32 * GetGoodSuffix(ArenaAllocator* allocator, const Char * pat, CharCount patLen, int skip = 1);
- private:
- void Init();
- Scheme scheme;
- Char const * const pat;
- CharCount const patLen;
- uint numLinearChars;
- Char linearChar[MaxCharMapLinearChars];
- int32 lastOcc[MaxCharMapLinearChars];
- };
- template <typename C>
- class TextbookBoyerMooreWithLinearMap : private Chars<C>
- {
- template <typename T>
- friend struct TextbookBoyerMooreSetup;
- typedef typename Chars<C>::Char Char;
- private:
- typedef CharMap<Char, int32, CharMapScheme_Linear> LastOccMap;
- // NOTE: We don't store the actual pattern here since it may be moved between
- // constructing the scanner and running it.
- LastOccMap lastOccurrence;
- int32 *goodSuffix;
- public:
- inline TextbookBoyerMooreWithLinearMap() : lastOccurrence(-1), goodSuffix(0) {}
- // Construct Boyer-Moore tables for pattern pat:
- // - pat must be of length patLen * skip
- // - if skip is > 1, then each consecutive skip characters of pattern are assumed to represent
- // an equivalence class of characters in canonical order (i.e. all chars in a class are represented
- // by the same sequence of skip chars)
- // - otherwise this is a regular exact-match pattern
- void Setup(ArenaAllocator* allocator, TextbookBoyerMooreSetup<C> const& setup);
- void FreeBody(ArenaAllocator* allocator, CharCount patLen);
- // NOTE: In the following pat and patLen must be the same as passed to Setup above
- // For skip = 1
- template <uint equivClassSize>
- bool Match
- ( const Char *const input
- , const CharCount inputLength
- , CharCount& inputOffset
- , const Char* pat
- , const CharCount patLen
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , RegexStats* stats
- #endif
- ) const;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- static char16 const * GetName() { return _u("linear map Boyer-Moore"); }
- #endif
- };
- template <typename C>
- class TextbookBoyerMoore : private Chars<C>
- {
- template <typename T>
- friend struct TextbookBoyerMooreSetup;
- typedef typename Chars<C>::Char Char;
- private:
- typedef CharMap<Char, int32> LastOccMap;
- // NOTE: We don't store the actual pattern here since it may be moved between
- // constructing the scanner and running it.
- Field(LastOccMap) lastOccurrence;
- Field(int32 *) goodSuffix;
- public:
- inline TextbookBoyerMoore() : lastOccurrence(-1), goodSuffix(nullptr) {}
- // Construct Boyer-Moore tables for pattern pat:
- // - pat must be of length patLen * skip
- // - if skip is > 1, then each consecutive skip characters of pattern are assumed to represent
- // an equivalence class of characters in canonical order (i.e. all chars in a class are represented
- // by the same sequence of skip chars)
- // - otherwise this is a regular exact-match pattern
- void Setup(ArenaAllocator* allocator, TextbookBoyerMooreSetup<C> const& setup);
- void Setup(ArenaAllocator * allocator, const Char * pat, CharCount patLen, int skip);
- void FreeBody(ArenaAllocator* allocator, CharCount patLen);
- // NOTE: In the following pat and patLen must be the same as passed to Setup above
- template <uint equivClassSize>
- inline bool Match
- ( const Char *const input
- , const CharCount inputLength
- , CharCount& inputOffset
- , const Char* pat
- , const CharCount patLen
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , RegexStats* stats
- #endif
- ) const
- {
- return Match<equivClassSize, equivClassSize>(input, inputLength, inputOffset, pat, patLen
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , stats
- #endif
- );
- }
- template <uint equivClassSize, uint lastPatCharEquivClass>
- bool Match
- ( const Char *const input
- , const CharCount inputLength
- , CharCount& inputOffset
- , const Char* pat
- , const CharCount patLen
- #if ENABLE_REGEX_CONFIG_OPTIONS
- , RegexStats* stats
- #endif
- ) const;
- #if ENABLE_REGEX_CONFIG_OPTIONS
- static char16 const * GetName() { return _u("full map Boyer-Moore"); }
- #endif
- };
- }
|