Browse Source

[MERGE #5372 @VSadov] Improving performance of SCA Deserialization

Merge pull request #5372 from VSadov:SCAperf01

Some low-hanging fruit related to SCA deserialization perf.

There are also some  "Full" changes reducing transient garbage in the cloner.

Related to OS:#17883520

Not quite fixing the entire reported perf gap, but should be narrowing it by  10%-20%
Vladimir Sadov 7 years ago
parent
commit
bb5250aa50

+ 10 - 14
lib/Common/DataStructures/Comparer.h

@@ -68,7 +68,7 @@ struct DefaultComparer<size_t>
 #ifdef TARGET_64
         // For 64 bits we want all 64 bits of the pointer to be represented in the hash code.
         uint32 hi = ((UINT_PTR) i >> 32);
-        uint32 lo = (uint32) (i & 0xFFFFFFFF);
+        uint32 lo = (uint32)i;
         hash_t hash = hi ^ lo;
 #else
         hash_t hash = i;
@@ -112,19 +112,15 @@ struct RecyclerPointerComparer
     }
 };
 
-// FNV-1a hash -> https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-// #define CC_HASH_OFFSET_VALUE 2166136261
-// #define CC_HASH_LOGIC(hash, byte) \
-//    hash ^= byte;                  \
-//    hash *= 16777619
-
-// previous hash function.
-// TODO: hash function below is bad for key distribution.
-//       FNV-1a above results better but expensive for lookups in small data sets.
-#define CC_HASH_OFFSET_VALUE 0
-#define CC_HASH_LOGIC(hash, byte) \
-    hash = _rotl(hash, 7);        \
-    hash ^= byte;
+ // TODO: FNV is a proven hash especially for short strings, which is common case here.
+ //       Still. it may be worth to consider a more recent block-based hash. 
+ //       They tend to be faster, but it need to be examined against typical workloads.
+ //
+ //  FNV-1a hash -> https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+ #define CC_HASH_OFFSET_VALUE 2166136261
+ #define CC_HASH_LOGIC(hash, byte) \
+    hash ^= byte;                  \
+    hash *= 16777619
 
 template <>
 struct DefaultComparer<GUID>

+ 7 - 0
lib/Common/DataStructures/SizePolicy.h

@@ -32,6 +32,13 @@ struct PowerOf2Policy
     inline static hash_t GetBucket(hash_t hashCode, uint size, int modFunctionIndex)
     {
         AssertMsg(Math::IsPow2(size) && size > 1, "Size is not a power of 2.");
+
+        // we often deal with keys that differ in higher bits only, so smudge the entropy down a little 
+        // we do not need a perfect avalanche here, since this is for a hashtable lookup
+        // the following is sufficient and reasonably cheap
+        hashCode ^= (uint)hashCode >> 15;
+        hashCode ^= (uint)hashCode >> 7;
+
         hash_t targetBucket = hashCode & (size-1);
         return targetBucket;
     }

+ 1 - 1
lib/Runtime/Base/ThreadContext.h

@@ -461,7 +461,7 @@ private:
     };
 
 public:
-    typedef JsUtil::BaseHashSet<const Js::PropertyRecord *, HeapAllocator, PrimeSizePolicy, const Js::PropertyRecord *,
+    typedef JsUtil::BaseHashSet<const Js::PropertyRecord *, HeapAllocator, PowerOf2SizePolicy, const Js::PropertyRecord *,
         Js::PropertyRecordStringHashComparer, JsUtil::SimpleHashedEntry, JsUtil::AsymetricResizeLock> PropertyMap;
     PropertyMap * propertyMap;
 

+ 53 - 14
lib/Runtime/Types/TypePath.h

@@ -8,7 +8,7 @@ namespace Js
 {
     class TinyDictionary
     {
-        static const int PowerOf2_BUCKETS = 8;
+        static const int PowerOf2_BUCKETS = 16;
         static const int BUCKETS_DWORDS = PowerOf2_BUCKETS / sizeof(DWORD);
         static const byte NIL = 0xff;
 
@@ -19,19 +19,40 @@ public:
         TinyDictionary()
         {
             CompileAssert(BUCKETS_DWORDS * sizeof(DWORD) == PowerOf2_BUCKETS);
-            CompileAssert(BUCKETS_DWORDS == 2);
+            CompileAssert(BUCKETS_DWORDS == 4);
             DWORD* init = bucketsData;
-            init[0] = init[1] = 0xffffffff;
+            init[0] = init[1] = init[2] = init[3] =0xffffffff;
+        }
+
+        uint32 ReduceKeyToIndex(PropertyId key)
+        {
+            // we use 4-bit bucket index, but we often have keys that are larger. 
+            // use Fibonacci hash to reduce the possibility of collisions
+#if TARGET_64
+            return (key * 11400714819323198485llu) >> 60;
+#else
+            return (key * 2654435769ul) >> 28;
+#endif
         }
 
         void Add(PropertyId key, byte value)
         {
+            Assert(value < 128);
+
             byte* buckets = reinterpret_cast<byte*>(bucketsData);
-            uint32 bucketIndex = key & (PowerOf2_BUCKETS - 1);
+            uint32 bucketIndex = ReduceKeyToIndex(key);
 
             byte i = buckets[bucketIndex];
-            buckets[bucketIndex] = value;
-            next[value] = i;
+            if (i == NIL)
+            {
+                // set the highest bit to mark the value as the last in the chain
+                buckets[bucketIndex] = value | 128;
+            }
+            else
+            {
+                buckets[bucketIndex] = value;
+                next[value] = i;
+            }
         }
 
         // Template shared with diagnostics
@@ -39,17 +60,32 @@ public:
         inline bool TryGetValue(PropertyId key, PropertyIndex* index, const Data& data)
         {
             byte* buckets = reinterpret_cast<byte*>(bucketsData);
-            uint32 bucketIndex = key & (PowerOf2_BUCKETS - 1);
+            uint32 bucketIndex = ReduceKeyToIndex(key);
 
-            for (byte i = buckets[bucketIndex] ; i != NIL ; i = next[i])
+            byte i = buckets[bucketIndex];
+            if (i != NIL)
             {
-                if (data[i]->GetPropertyId()== key)
-                {
-                    *index = i;
-                    return true;
+                for(;;)
+                {                    
+                    byte idx = i & 127; // strip the sentinel bit
+
+                    if (data[idx]->GetPropertyId() == key)
+                    {
+                        *index = idx;
+                        return true;
+                    }
+
+                    if (i & 128)
+                    {
+                        // this was the last value in the chain
+                        break;
+                    }
+
+                    Assert(idx != (next[idx] & 127));
+                    i = next[idx];
                 }
-                Assert(i != next[i]);
             }
+
             return false;
         }
     };
@@ -75,7 +111,10 @@ public:
 #define TYPE_PATH_ALLOC_GRANULARITY_GAP 3
 #endif
 #endif
-        // Although we can allocate 2 more, this will put struct Data into another bucket.  Just waste some slot in that case for 32-bit
+        // Although we can allocate 2 more, 
+        // TinyDictionary can hold only up to 128 items (see TinyDictionary::Add),
+        // Besides 128+ would put struct Data into another bucket.
+        // Just waste some slot in that case for 32-bit
         static const uint MaxPathTypeHandlerLength = 128;
         static const uint InitialTypePathSize = 16 + TYPE_PATH_ALLOC_GRANULARITY_GAP;