SimpleHashTable.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. template<typename TKey, typename TData>
  7. struct SimpleHashEntry {
  8. TKey key;
  9. TData value;
  10. SimpleHashEntry *next;
  11. };
  12. // Size should be a power of 2 for optimal performance
  13. template<
  14. typename TKey,
  15. typename TData,
  16. typename TAllocator = ArenaAllocator,
  17. template <typename DataOrKey> class Comparer = DefaultComparer,
  18. bool resize = false,
  19. typename SizePolicy = PowerOf2Policy>
  20. class SimpleHashTable
  21. {
  22. typedef SimpleHashEntry<TKey, TData> EntryType;
  23. // REVIEW: Consider 5 or 7 as multiplication of these might be faster.
  24. static const int MaxAverageChainLength = 6;
  25. TAllocator *allocator;
  26. EntryType **table;
  27. EntryType *free;
  28. uint count;
  29. uint size;
  30. uint freecount;
  31. bool disableResize;
  32. int modFunctionIndex;
  33. #if PROFILE_DICTIONARY
  34. DictionaryStats *stats;
  35. #endif
  36. public:
  37. SimpleHashTable(TAllocator *allocator) :
  38. allocator(allocator),
  39. count(0),
  40. freecount(0),
  41. modFunctionIndex(UNKNOWN_MOD_INDEX)
  42. {
  43. this->size = SizePolicy::GetSize(64, &modFunctionIndex);
  44. Initialize();
  45. }
  46. SimpleHashTable(uint size, TAllocator* allocator) :
  47. allocator(allocator),
  48. count(0),
  49. freecount(0),
  50. modFunctionIndex(UNKNOWN_MOD_INDEX)
  51. {
  52. this->size = SizePolicy::GetSize(size, &modFunctionIndex);
  53. Initialize();
  54. }
  55. void Initialize()
  56. {
  57. disableResize = false;
  58. free = nullptr;
  59. table = AllocatorNewArrayZ(TAllocator, allocator, EntryType*, size);
  60. #if PROFILE_DICTIONARY
  61. stats = DictionaryStats::Create(typeid(this).name(), size);
  62. #endif
  63. }
  64. ~SimpleHashTable()
  65. {
  66. for (uint i = 0; i < size; i++)
  67. {
  68. EntryType * entry = table[i];
  69. while (entry != nullptr)
  70. {
  71. EntryType * next = entry->next;
  72. AllocatorDelete(TAllocator, allocator, entry);
  73. entry = next;
  74. }
  75. }
  76. while(free)
  77. {
  78. EntryType* current = free;
  79. free = current->next;
  80. AllocatorDelete(TAllocator, allocator, current);
  81. }
  82. AllocatorDeleteArray(TAllocator, allocator, size, table);
  83. }
  84. void DisableResize()
  85. {
  86. Assert(!resize || !disableResize);
  87. disableResize = true;
  88. }
  89. void EnableResize()
  90. {
  91. Assert(!resize || disableResize);
  92. disableResize = false;
  93. }
  94. void Set(TKey key, TData data)
  95. {
  96. EntryType* entry = FindOrAddEntry(key);
  97. entry->value = data;
  98. }
  99. bool Add(TKey key, TData data)
  100. {
  101. uint targetBucket = HashKeyToBucket(key);
  102. if(FindEntry(key, targetBucket) != nullptr)
  103. {
  104. return false;
  105. }
  106. AddInternal(key, data, targetBucket);
  107. return true;
  108. }
  109. void ReplaceValue(TKey key,TData data)
  110. {
  111. EntryType *current = FindEntry(key);
  112. if (current != nullptr)
  113. {
  114. current->value = data;
  115. }
  116. }
  117. void Remove(TKey key)
  118. {
  119. Remove(key, nullptr);
  120. }
  121. void Remove(TKey key, TData* pOut)
  122. {
  123. uint val = HashKeyToBucket(key);
  124. EntryType **prev=&table[val];
  125. for (EntryType * current = *prev ; current != nullptr; current = current->next)
  126. {
  127. if (Comparer<TKey>::Equals(key, current->key))
  128. {
  129. *prev = current->next;
  130. if (pOut != nullptr)
  131. {
  132. (*pOut) = current->value;
  133. }
  134. FreeEntry(current);
  135. #if PROFILE_DICTIONARY
  136. if (stats)
  137. stats->Remove(table[val] == nullptr);
  138. #endif
  139. break;
  140. }
  141. prev = &current->next;
  142. }
  143. }
  144. BOOL HasEntry(TKey key)
  145. {
  146. return (FindEntry(key) != nullptr);
  147. }
  148. uint Count() const
  149. {
  150. return(count);
  151. }
  152. // If density is a compile-time constant, then we can optimize (avoids division)
  153. // Sometimes the compiler can also make this optimization, but this way it is guaranteed.
  154. template< uint density > bool IsDenserThan() const
  155. {
  156. return count > (size * density);
  157. }
  158. TData Lookup(TKey key)
  159. {
  160. EntryType *current = FindEntry(key);
  161. if (current != nullptr)
  162. {
  163. return current->value;
  164. }
  165. return TData();
  166. }
  167. TData LookupIndex(int index)
  168. {
  169. EntryType *current;
  170. int j=0;
  171. for (uint i=0; i < size; i++)
  172. {
  173. for (current = table[i] ; current != nullptr; current = current->next)
  174. {
  175. if (j==index)
  176. {
  177. return current->value;
  178. }
  179. j++;
  180. }
  181. }
  182. return nullptr;
  183. }
  184. bool TryGetValue(TKey key, TData *dataReference)
  185. {
  186. EntryType *current = FindEntry(key);
  187. if (current != nullptr)
  188. {
  189. *dataReference = current->value;
  190. return true;
  191. }
  192. return false;
  193. }
  194. TData& GetReference(TKey key)
  195. {
  196. EntryType * current = FindOrAddEntry(key);
  197. return current->value;
  198. }
  199. TData * TryGetReference(TKey key)
  200. {
  201. EntryType * current = FindEntry(key);
  202. if (current != nullptr)
  203. {
  204. return &current->value;
  205. }
  206. return nullptr;
  207. }
  208. template <class Fn>
  209. void Map(Fn fn)
  210. {
  211. EntryType *current;
  212. for (uint i=0;i<size;i++) {
  213. for (current = table[i] ; current != nullptr; current = current->next) {
  214. fn(current->key,current->value);
  215. }
  216. }
  217. }
  218. template <class Fn>
  219. void MapAndRemoveIf(Fn fn)
  220. {
  221. for (uint i=0; i<size; i++)
  222. {
  223. EntryType ** prev = &table[i];
  224. while (EntryType * current = *prev)
  225. {
  226. if (fn(current->key,current->value))
  227. {
  228. *prev = current->next;
  229. FreeEntry(current);
  230. }
  231. else
  232. {
  233. prev = &current->next;
  234. }
  235. }
  236. }
  237. }
  238. private:
  239. uint HashKeyToBucket(TKey hashKey)
  240. {
  241. return HashKeyToBucket(hashKey, size);
  242. }
  243. uint HashKeyToBucket(TKey hashKey, int size)
  244. {
  245. hash_t hashCode = Comparer<TKey>::GetHashCode(hashKey);
  246. return SizePolicy::GetBucket(hashCode, size, modFunctionIndex);
  247. }
  248. EntryType * FindEntry(TKey key)
  249. {
  250. uint targetBucket = HashKeyToBucket(key);
  251. return FindEntry(key, targetBucket);
  252. }
  253. EntryType * FindEntry(TKey key, uint targetBucket)
  254. {
  255. for (EntryType * current = table[targetBucket] ; current != nullptr; current = current->next)
  256. {
  257. if (Comparer<TKey>::Equals(key, current->key))
  258. {
  259. return current;
  260. }
  261. }
  262. return nullptr;
  263. }
  264. EntryType * FindOrAddEntry(TKey key)
  265. {
  266. uint targetBucket = HashKeyToBucket(key);
  267. EntryType * entry = FindEntry(key, targetBucket);
  268. if (entry == nullptr)
  269. {
  270. entry = AddInternal(key, TData(), targetBucket);
  271. }
  272. return entry;
  273. }
  274. void FreeEntry(EntryType* current)
  275. {
  276. count--;
  277. if ( freecount < 10 )
  278. {
  279. current->key = nullptr;
  280. current->value = NULL;
  281. current->next = free;
  282. free = current;
  283. freecount++;
  284. }
  285. else
  286. {
  287. AllocatorDelete(TAllocator, allocator, current);
  288. }
  289. }
  290. EntryType* GetFreeEntry()
  291. {
  292. EntryType* retFree = free;
  293. if (nullptr == retFree )
  294. {
  295. retFree = AllocatorNewStruct(TAllocator, allocator, EntryType);
  296. }
  297. else
  298. {
  299. free = retFree->next;
  300. freecount--;
  301. }
  302. return retFree;
  303. }
  304. EntryType* AddInternal(TKey key, TData data, uint targetBucket)
  305. {
  306. if(resize && !disableResize && IsDenserThan<MaxAverageChainLength>())
  307. {
  308. Resize(SizePolicy::GetSize(size*2, &modFunctionIndex));
  309. // After resize - we will need to recalculate the bucket
  310. targetBucket = HashKeyToBucket(key);
  311. }
  312. EntryType* entry = GetFreeEntry();
  313. entry->key = key;
  314. entry->value = data;
  315. entry->next = table[targetBucket];
  316. table[targetBucket] = entry;
  317. count++;
  318. #if PROFILE_DICTIONARY
  319. uint depth = 0;
  320. for (EntryType * current = table[targetBucket] ; current != nullptr; current = current->next)
  321. {
  322. ++depth;
  323. }
  324. if (stats)
  325. stats->Insert(depth);
  326. #endif
  327. return entry;
  328. }
  329. void Resize(int newSize)
  330. {
  331. Assert(!this->disableResize);
  332. EntryType** newTable = AllocatorNewArrayZ(TAllocator, allocator, EntryType*, newSize);
  333. for (uint i=0; i < size; i++)
  334. {
  335. EntryType* current = table[i];
  336. while (current != nullptr)
  337. {
  338. int targetBucket = HashKeyToBucket(current->key, newSize);
  339. EntryType* next = current->next; // Cache the next pointer
  340. current->next = newTable[targetBucket];
  341. newTable[targetBucket] = current;
  342. current = next;
  343. }
  344. }
  345. AllocatorDeleteArray(TAllocator, allocator, this->size, this->table);
  346. this->size = newSize;
  347. this->table = newTable;
  348. #if PROFILE_DICTIONARY
  349. if (stats)
  350. {
  351. uint emptyBuckets = 0 ;
  352. for (uint i=0; i < size; i++)
  353. {
  354. if(table[i] == nullptr)
  355. {
  356. emptyBuckets++;
  357. }
  358. }
  359. stats->Resize(newSize, emptyBuckets);
  360. }
  361. #endif
  362. }
  363. };