Procházet zdrojové kódy

Simd.js: Refactoring Shuffle/Swizzle helper.

Refactoring the shuffle/swizzle helper methods in SIMDUtils
to avoid the run time issue by using Visual Studio 2013 compiler.
Arun před 9 roky
rodič
revize
5b7892bc28

+ 32 - 14
lib/Runtime/Language/InterpreterProcessOpCodeAsmJs.h

@@ -836,7 +836,7 @@ if (switchProfileMode) \
     lanes[10] = GetRegRawInt(playout->I12); lanes[11] = GetRegRawInt(playout->I13); \
     lanes[12] = GetRegRawInt(playout->I14); lanes[13] = GetRegRawInt(playout->I15); \
     lanes[14] = GetRegRawInt(playout->I16); lanes[15] = GetRegRawInt(playout->I17); \
-    SetRegRawSimd(playout->I16_0, func<16>(GetRegRawSimd(playout->I16_1), GetRegRawSimd(playout->I16_1),lanes)); \
+    SetRegRawSimd(playout->I16_0, func(GetRegRawSimd(playout->I16_1), GetRegRawSimd(playout->I16_1), 16, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_I16_1I16toI16_1(name, func) PROCESS_SIMD_I16_1I16toI16_1_COMMON(name, func,)
@@ -855,7 +855,7 @@ if (switchProfileMode) \
     lanes[10] = GetRegRawInt(playout->I13); lanes[11] = GetRegRawInt(playout->I14); \
     lanes[12] = GetRegRawInt(playout->I15); lanes[13] = GetRegRawInt(playout->I16); \
     lanes[14] = GetRegRawInt(playout->I17); lanes[15] = GetRegRawInt(playout->I18); \
-    SetRegRawSimd(playout->I16_0, func<16>(GetRegRawSimd(playout->I16_1), GetRegRawSimd(playout->I16_2), lanes)); \
+    SetRegRawSimd(playout->I16_0, func(GetRegRawSimd(playout->I16_1), GetRegRawSimd(playout->I16_2), 16, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_I16_2I16toI16_1(name, func) PROCESS_SIMD_I16_2I16toI16_1_COMMON(name, func,)
@@ -1727,7 +1727,10 @@ if (switchProfileMode) \
     case OpCodeAsmJs::name: \
     { \
     PROCESS_READ_LAYOUT_ASMJS(name, Float32x4_2Int4, suffix); \
-    SetRegRawSimd(playout->F4_0, func(GetRegRawSimd(playout->F4_1), GetRegRawSimd(playout->F4_1), GetRegRawInt(playout->I2), GetRegRawInt(playout->I3), GetRegRawInt(playout->I4), GetRegRawInt(playout->I5))); \
+    uint32 lanes[4]; \
+    lanes[0] = GetRegRawInt(playout->I2);   lanes[1] = GetRegRawInt(playout->I3); \
+    lanes[2] = GetRegRawInt(playout->I4);   lanes[3] = GetRegRawInt(playout->I5); \
+    SetRegRawSimd(playout->F4_0, func(GetRegRawSimd(playout->F4_1), GetRegRawSimd(playout->F4_1), 4, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_F4_1I4toF4_1(name, func) PROCESS_SIMD_F4_1I4toF4_1_COMMON(name, func,)
@@ -1737,7 +1740,10 @@ if (switchProfileMode) \
     case OpCodeAsmJs::name: \
     { \
     PROCESS_READ_LAYOUT_ASMJS(name, Float32x4_3Int4, suffix); \
-    SetRegRawSimd(playout->F4_0, func(GetRegRawSimd(playout->F4_1), GetRegRawSimd(playout->F4_2), GetRegRawInt(playout->I3), GetRegRawInt(playout->I4), GetRegRawInt(playout->I5), GetRegRawInt(playout->I6))); \
+    uint32 lanes[4]; \
+    lanes[0] = GetRegRawInt(playout->I3); lanes[1] = GetRegRawInt(playout->I4); \
+    lanes[2] = GetRegRawInt(playout->I5); lanes[3] = GetRegRawInt(playout->I6); \
+    SetRegRawSimd(playout->F4_0, func(GetRegRawSimd(playout->F4_1), GetRegRawSimd(playout->F4_2), 4, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_F4_1I4toF4_1(name, func) PROCESS_SIMD_F4_1I4toF4_1_COMMON(name, func,)
@@ -1747,7 +1753,10 @@ if (switchProfileMode) \
     case OpCodeAsmJs::name: \
     { \
     PROCESS_READ_LAYOUT_ASMJS(name, Int32x4_2Int4, suffix); \
-    SetRegRawSimd(playout->I4_0, func(GetRegRawSimd(playout->I4_1), GetRegRawSimd(playout->I4_1), GetRegRawInt(playout->I2), GetRegRawInt(playout->I3), GetRegRawInt(playout->I4), GetRegRawInt(playout->I5))); \
+    uint32 lanes[4]; \
+    lanes[0] = GetRegRawInt(playout->I2);   lanes[1] = GetRegRawInt(playout->I3); \
+    lanes[2] = GetRegRawInt(playout->I4);   lanes[3] = GetRegRawInt(playout->I5); \
+    SetRegRawSimd(playout->I4_0, func(GetRegRawSimd(playout->I4_1), GetRegRawSimd(playout->I4_1), 4, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_I4_1I4toI4_1(name, func) PROCESS_SIMD_I4_1I4toI4_1_COMMON(name, func,)
@@ -1757,7 +1766,10 @@ if (switchProfileMode) \
     case OpCodeAsmJs::name: \
     { \
     PROCESS_READ_LAYOUT_ASMJS(name, Int32x4_3Int4, suffix); \
-    SetRegRawSimd(playout->I4_0, func(GetRegRawSimd(playout->I4_1), GetRegRawSimd(playout->I4_2), GetRegRawInt(playout->I3), GetRegRawInt(playout->I4), GetRegRawInt(playout->I5), GetRegRawInt(playout->I6))); \
+    uint32 lanes[4]; \
+    lanes[0] = GetRegRawInt(playout->I3); lanes[1] = GetRegRawInt(playout->I4); \
+    lanes[2] = GetRegRawInt(playout->I5); lanes[3] = GetRegRawInt(playout->I6); \
+    SetRegRawSimd(playout->I4_0, func(GetRegRawSimd(playout->I4_1), GetRegRawSimd(playout->I4_2), 4, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_I4_1I4toI4_1(name, func) PROCESS_SIMD_I4_1I4toI4_1_COMMON(name, func,)
@@ -1773,7 +1785,7 @@ if (switchProfileMode) \
     lanes[2] = GetRegRawInt(playout->I4); lanes[3] = GetRegRawInt(playout->I5); \
     lanes[4] = GetRegRawInt(playout->I6); lanes[5] = GetRegRawInt(playout->I7); \
     lanes[6] = GetRegRawInt(playout->I8); lanes[7] = GetRegRawInt(playout->I9); \
-    SetRegRawSimd(playout->I8_0, func(GetRegRawSimd(playout->I8_1), GetRegRawSimd(playout->I8_1),lanes)); \
+    SetRegRawSimd(playout->I8_0, func(GetRegRawSimd(playout->I8_1), GetRegRawSimd(playout->I8_1), 8, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_I8_1I8toI8_1(name, func) PROCESS_SIMD_I8_1I8toI8_1_COMMON(name, func,)
@@ -1788,7 +1800,7 @@ if (switchProfileMode) \
     lanes[2] = GetRegRawInt(playout->I5); lanes[3] = GetRegRawInt(playout->I6); \
     lanes[4] = GetRegRawInt(playout->I7); lanes[5] = GetRegRawInt(playout->I8); \
     lanes[6] = GetRegRawInt(playout->I9); lanes[7] = GetRegRawInt(playout->I10); \
-    SetRegRawSimd(playout->I8_0, func(GetRegRawSimd(playout->I8_1), GetRegRawSimd(playout->I8_2), lanes)); \
+    SetRegRawSimd(playout->I8_0, func(GetRegRawSimd(playout->I8_1), GetRegRawSimd(playout->I8_2), 8, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_I8_1I8toI8_1(name, func) PROCESS_SIMD_I8_1I8toI8_1_COMMON(name, func,)
@@ -1798,7 +1810,10 @@ if (switchProfileMode) \
     case OpCodeAsmJs::name: \
     { \
     PROCESS_READ_LAYOUT_ASMJS(name, Uint32x4_2Int4, suffix); \
-    SetRegRawSimd(playout->U4_0, func(GetRegRawSimd(playout->U4_1), GetRegRawSimd(playout->U4_1), GetRegRawInt(playout->I2), GetRegRawInt(playout->I3), GetRegRawInt(playout->I4), GetRegRawInt(playout->I5))); \
+    uint32 lanes[4]; \
+    lanes[0] = GetRegRawInt(playout->I2);   lanes[1] = GetRegRawInt(playout->I3); \
+    lanes[2] = GetRegRawInt(playout->I4);   lanes[3] = GetRegRawInt(playout->I5); \
+    SetRegRawSimd(playout->U4_0, func(GetRegRawSimd(playout->U4_1), GetRegRawSimd(playout->U4_1), 4, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_U4_1I4toU4_1(name, func) PROCESS_SIMD_U4_1I4toU4_1_COMMON(name, func,)
@@ -1808,7 +1823,10 @@ if (switchProfileMode) \
     case OpCodeAsmJs::name: \
     { \
     PROCESS_READ_LAYOUT_ASMJS(name, Uint32x4_3Int4, suffix); \
-    SetRegRawSimd(playout->U4_0, func(GetRegRawSimd(playout->U4_1), GetRegRawSimd(playout->U4_2), GetRegRawInt(playout->I3), GetRegRawInt(playout->I4), GetRegRawInt(playout->I5), GetRegRawInt(playout->I6))); \
+    uint32 lanes[4]; \
+    lanes[0] = GetRegRawInt(playout->I3); lanes[1] = GetRegRawInt(playout->I4); \
+    lanes[2] = GetRegRawInt(playout->I5); lanes[3] = GetRegRawInt(playout->I6); \
+    SetRegRawSimd(playout->U4_0, func(GetRegRawSimd(playout->U4_1), GetRegRawSimd(playout->U4_2), 4, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_U4_1I4toU4_1(name, func) PROCESS_SIMD_U4_1I4toU4_1_COMMON(name, func,)
@@ -1823,7 +1841,7 @@ if (switchProfileMode) \
     lanes[2] = GetRegRawInt(playout->I4); lanes[3] = GetRegRawInt(playout->I5); \
     lanes[4] = GetRegRawInt(playout->I6); lanes[5] = GetRegRawInt(playout->I7); \
     lanes[6] = GetRegRawInt(playout->I8); lanes[7] = GetRegRawInt(playout->I9); \
-    SetRegRawSimd(playout->U8_0, func(GetRegRawSimd(playout->U8_1), GetRegRawSimd(playout->U8_1),lanes)); \
+    SetRegRawSimd(playout->U8_0, func(GetRegRawSimd(playout->U8_1), GetRegRawSimd(playout->U8_1), 8, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_U8_1I8toU8_1(name, func) PROCESS_SIMD_U8_1I8toU8_1_COMMON(name, func,)
@@ -1838,7 +1856,7 @@ if (switchProfileMode) \
     lanes[2] = GetRegRawInt(playout->I5); lanes[3] = GetRegRawInt(playout->I6); \
     lanes[4] = GetRegRawInt(playout->I7); lanes[5] = GetRegRawInt(playout->I8); \
     lanes[6] = GetRegRawInt(playout->I9); lanes[7] = GetRegRawInt(playout->I10); \
-    SetRegRawSimd(playout->U8_0, func(GetRegRawSimd(playout->U8_1), GetRegRawSimd(playout->U8_2), lanes)); \
+    SetRegRawSimd(playout->U8_0, func(GetRegRawSimd(playout->U8_1), GetRegRawSimd(playout->U8_2), 8, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_U8_1I8toU8_1(name, func) PROCESS_SIMD_U8_1I8toU8_1_COMMON(name, func,)
@@ -1857,7 +1875,7 @@ if (switchProfileMode) \
     lanes[10] = GetRegRawInt(playout->I12); lanes[11] = GetRegRawInt(playout->I13); \
     lanes[12] = GetRegRawInt(playout->I14); lanes[13] = GetRegRawInt(playout->I15); \
     lanes[14] = GetRegRawInt(playout->I16); lanes[15] = GetRegRawInt(playout->I17); \
-    SetRegRawSimd(playout->U16_0, func<16>(GetRegRawSimd(playout->U16_1), GetRegRawSimd(playout->U16_1),lanes)); \
+    SetRegRawSimd(playout->U16_0, func(GetRegRawSimd(playout->U16_1), GetRegRawSimd(playout->U16_1), 16,lanes)); \
     break; \
     }
 #define PROCESS_SIMD_U16_1I16toU16_1(name, func) PROCESS_SIMD_U16_1I16toU16_1_COMMON(name, func,)
@@ -1876,7 +1894,7 @@ if (switchProfileMode) \
     lanes[10] = GetRegRawInt(playout->I13); lanes[11] = GetRegRawInt(playout->I14); \
     lanes[12] = GetRegRawInt(playout->I15); lanes[13] = GetRegRawInt(playout->I16); \
     lanes[14] = GetRegRawInt(playout->I17); lanes[15] = GetRegRawInt(playout->I18); \
-    SetRegRawSimd(playout->U16_0, func<16>(GetRegRawSimd(playout->U16_1), GetRegRawSimd(playout->U16_2), lanes)); \
+    SetRegRawSimd(playout->U16_0, func(GetRegRawSimd(playout->U16_1), GetRegRawSimd(playout->U16_2), 16, lanes)); \
     break; \
     }
 #define PROCESS_SIMD_U16_2I16toU16_1(name, func) PROCESS_SIMD_U16_2I16toU16_1_COMMON(name, func,)

+ 27 - 90
lib/Runtime/Language/SimdUtils.cpp

@@ -105,54 +105,39 @@ namespace Js
     }
 #endif
 
-
-
-    template <uint32 laneCount>
-    SIMDValue SIMDUtils::SIMD128InnerShuffle(const SIMDValue src1, const SIMDValue src2, uint32 lane0, uint32 lane1, uint32 lane2, uint32 lane3)
-    {
-        SIMDValue result;
-        CompileAssert(laneCount == 4 || laneCount == 2);
-        if (laneCount == 4)
-        {
-            result.i32[SIMD_X] = lane0 < laneCount ? src1.i32[lane0] : src2.i32[lane0 - laneCount];
-            result.i32[SIMD_Y] = lane1 < laneCount ? src1.i32[lane1] : src2.i32[lane1 - laneCount];
-            result.i32[SIMD_Z] = lane2 < laneCount ? src1.i32[lane2] : src2.i32[lane2 - laneCount];
-            result.i32[SIMD_W] = lane3 < laneCount ? src1.i32[lane3] : src2.i32[lane3 - laneCount];
-        }
-        else
-        {
-            result.f64[SIMD_X] = lane0 < laneCount ? src1.f64[lane0] : src2.f64[lane0 - laneCount];
-            result.f64[SIMD_Y] = lane1 < laneCount ? src1.f64[lane1] : src2.f64[lane1 - laneCount];
-        }
-        return result;
-    }
-
-    template <uint32 laneCount>
-    SIMDValue SIMDUtils::SIMD128InnerShuffle(const SIMDValue src1, const SIMDValue src2, const uint32* lanes)
+    SIMDValue SIMDUtils::SIMD128InnerShuffle(const SIMDValue src1, const SIMDValue src2, uint32 laneCount, const uint32* lanes)
     {
         SIMDValue result = { 0 };
-        CompileAssert(laneCount == 16 || laneCount == 8);
+        Assert(laneCount == 16 || laneCount == 8 || laneCount == 4);
         Assert(lanes != nullptr);
-        if (laneCount == 8)
+        switch (laneCount)
         {
+        case 4:
+            for (uint i = 0; i < laneCount; ++i)
+            {
+                result.i32[i] = lanes[i] < laneCount ? src1.i32[lanes[i]] : src2.i32[lanes[i] - laneCount];
+            }
+            break;
+        case 8:
             for (uint i = 0; i < laneCount; ++i)
             {
                 result.i16[i] = lanes[i] < laneCount ? src1.i16[lanes[i]] : src2.i16[lanes[i] - laneCount];
             }
-        }
-        else
-        {
+            break;
+        case 16:
             for (uint i = 0; i < laneCount; ++i)
             {
                 result.i8[i] = lanes[i] < laneCount ? src1.i8[lanes[i]] : src2.i8[lanes[i] - laneCount];
             }
+            break;
+        default:
+            Assert(UNREACHED);
         }
-
         return result;
     }
 
-    template <class SIMDType, uint32 laneCount>
-    Var SIMDUtils::SIMD128SlowShuffle(Var src1, Var src2, Var* lanes, const uint32 range, ScriptContext* scriptContext)
+    template <class SIMDType>
+    Var SIMDUtils::SIMD128SlowShuffle(Var src1, Var src2, Var* lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext)
     {
         SIMDType *a = SIMDType::FromVar(src1);
         SIMDType *b = SIMDType::FromVar(src2);
@@ -164,73 +149,25 @@ namespace Js
         SIMDValue result;
 
         uint32 laneValue[16] = { 0 };
-        CompileAssert(laneCount == 16 || laneCount == 8);
+        Assert(laneCount == 16 || laneCount == 8 || laneCount == 4);
 
         for (uint i = 0; i < laneCount; ++i)
         {
             laneValue[i] = SIMDUtils::SIMDCheckLaneIndex(scriptContext, lanes[i], range);
         }
 
-        result = SIMD128InnerShuffle<laneCount>(src1Value, src2Value, laneValue);
+        result = SIMD128InnerShuffle(src1Value, src2Value, laneCount, laneValue);
 
         return SIMDType::New(&result, scriptContext);
     }
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt8x16, 16> (Var src1, Var src2, Var *lanes, const uint32 range, ScriptContext* scriptContext);
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt16x8, 8>  (Var src1, Var src2, Var *lanes, const uint32 range, ScriptContext* scriptContext);
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint8x16, 16>(Var src1, Var src2, Var *lanes, const uint32 range, ScriptContext* scriptContext);
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint16x8, 8> (Var src1, Var src2, Var *lanes, const uint32 range, ScriptContext* scriptContext);
-
-    template <class SIMDType, uint32 laneCount>
-    Var SIMDUtils::SIMD128SlowShuffle(Var src1, Var src2, Var lane0, Var lane1, Var lane2, Var lane3, uint32 range, ScriptContext* scriptContext)
-    {
-        SIMDType *a = SIMDType::FromVar(src1);
-        SIMDType *b = SIMDType::FromVar(src2);
-        Assert(a);
-        Assert(b);
-
-        uint32 lane0Value = 0;
-        uint32 lane1Value = 0;
-        uint32 lane2Value = 0;
-        uint32 lane3Value = 0;
-
-        SIMDValue src1Value = a->GetValue();
-        SIMDValue src2Value = b->GetValue();
-        SIMDValue result;
-
-        CompileAssert(laneCount == 4 || laneCount == 2);
-
-        if (laneCount == 4)
-        {
-            lane0Value = SIMDUtils::SIMDCheckLaneIndex(scriptContext, lane0, range);
-            lane1Value = SIMDUtils::SIMDCheckLaneIndex(scriptContext, lane1, range);
-            lane2Value = SIMDUtils::SIMDCheckLaneIndex(scriptContext, lane2, range);
-            lane3Value = SIMDUtils::SIMDCheckLaneIndex(scriptContext, lane3, range);
-
-            Assert(lane0Value >= 0 && lane0Value < range);
-            Assert(lane1Value >= 0 && lane1Value < range);
-            Assert(lane2Value >= 0 && lane2Value < range);
-            Assert(lane3Value >= 0 && lane3Value < range);
-
-            result = SIMD128InnerShuffle<4>(src1Value, src2Value, lane0Value, lane1Value, lane2Value, lane3Value);
-        }
-        else
-        {
-            lane0Value = SIMDUtils::SIMDCheckLaneIndex(scriptContext, lane0, range);
-            lane1Value = SIMDUtils::SIMDCheckLaneIndex(scriptContext, lane1, range);
-
-            Assert(lane0Value >= 0 && lane0Value < range);
-            Assert(lane1Value >= 0 && lane1Value < range);
-
-            result = SIMD128InnerShuffle<2>(src1Value, src2Value, lane0Value, lane1Value, lane2Value, lane3Value);
-        }
-
-        return SIMDType::New(&result, scriptContext);
-    }
-
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt32x4  , 4> (Var src1, Var src2, Var lane0, Var lane1, Var lane2, Var lane3, uint32 range, ScriptContext* scriptContext);
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat32x4, 4> (Var src1, Var src2, Var lane0, Var lane1, Var lane2, Var lane3, uint32 range, ScriptContext* scriptContext);
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat64x2, 2> (Var src1, Var src2, Var lane0, Var lane1, Var lane2, Var lane3, uint32 range, ScriptContext* scriptContext);
-    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint32x4 , 4> (Var src1, Var src2, Var lane0, Var lane1, Var lane2, Var lane3, uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt32x4  >(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat32x4>(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat64x2>(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint32x4 >(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt8x16  >(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt16x8  >(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint8x16 >(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
+    template Var SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint16x8 >(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
 
     bool SIMDUtils::SIMDIsSupportedTypedArray(Var value)
     {

+ 4 - 9
lib/Runtime/Language/SimdUtils.h

@@ -265,15 +265,10 @@ namespace Js {
         ////////////////////////////////////////////
         // SIMD Shuffle Swizzle helpers
         ////////////////////////////////////////////
-        template <uint32 laneCount = 4>
-        static SIMDValue SIMD128InnerShuffle(const SIMDValue src1, const SIMDValue src2, uint32 lane0, uint32 lane1, uint32 lane2, uint32 lane3);
-        template <uint32 laneCount = 8>
-        static SIMDValue SIMD128InnerShuffle(const SIMDValue src1, const SIMDValue src2, const uint32* lanes = nullptr);
-
-        template <class SIMDType, uint32 laneCount = 4>
-        static Var SIMD128SlowShuffle(Var src1, Var src2, Var lane0, Var lane1, Var lane2, Var lane3, uint32 range, ScriptContext* scriptContext);
-        template <class SIMDType, uint32 laneCount = 8>
-        static Var SIMD128SlowShuffle(Var src1, Var src2, Var *lanes, const uint32 range, ScriptContext* scriptContext);
+        static SIMDValue SIMD128InnerShuffle(const SIMDValue src1, const SIMDValue src2, uint32 laneCount, const uint32* lanes = nullptr);
+
+        template <class SIMDType>
+        static Var SIMD128SlowShuffle(Var src1, Var src2, Var *lanes, const uint32 laneCount, const uint32 range, ScriptContext* scriptContext);
 
         ///////////////////////////////////////////
         // SIMD Type conversion

+ 12 - 10
lib/Runtime/Library/SimdFloat32x4Lib.cpp

@@ -910,12 +910,13 @@ namespace Js
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
 
-            Var lane0 = args[2];
-            Var lane1 = args[3];
-            Var lane2 = args[4];
-            Var lane3 = args[5];
+            Var lanes[4];
+            for (uint i = 0; i < 4; ++i)
+            {
+                lanes[i] = args[i + 2];
+            }
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat32x4>(args[1], args[1], lane0, lane1, lane2, lane3, 4, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat32x4>(args[1], args[1], lanes, 4, 4, scriptContext);
         }
 
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdFloat32x4TypeMismatch, _u("swizzle"));
@@ -942,12 +943,13 @@ namespace Js
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
 
-            Var lane0 = args[3];
-            Var lane1 = args[4];
-            Var lane2 = args[5];
-            Var lane3 = args[6];
+            Var lanes[4];
+            for (uint i = 0; i < 4; ++i)
+            {
+                lanes[i] = args[i + 3];
+            }
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat32x4>(args[1], args[2], lane0, lane1, lane2, lane3, 8, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat32x4>(args[1], args[2], lanes, 4, 8, scriptContext);
         }
 
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdFloat32x4TypeMismatch, _u("shuffle"));

+ 8 - 6
lib/Runtime/Library/SimdFloat64x2Lib.cpp

@@ -787,10 +787,11 @@ namespace Js
                 // missing lane args
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
-            Var lane0 = args[2];
-            Var lane1 = args[3];
+            Var lanes[2];
+            lanes[0] = args[2];
+            lanes[1] = args[3];
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat64x2, 2>(args[1], args[1], lane0, lane1, NULL, NULL, 2, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat64x2>(args[1], args[1], lanes, 2, 2, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdFloat64x2TypeMismatch, _u("swizzle"));
     }
@@ -815,10 +816,11 @@ namespace Js
                 // missing lane args
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
-            Var lane0 = args[3];
-            Var lane1 = args[4];
+            Var lanes[2];
+            lanes[0] = args[3];
+            lanes[1] = args[4];
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat64x2, 2>(args[1], args[2], lane0, lane1, NULL, NULL, 4, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDFloat64x2>(args[1], args[2], lanes, 2, 4, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdFloat64x2TypeMismatch, _u("shuffle"));
     }

+ 2 - 2
lib/Runtime/Library/SimdInt16x8Lib.cpp

@@ -749,7 +749,7 @@ namespace Js
             {
                 lanes[i] = args[i + 2];
             }
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt16x8>(args[1], args[1], lanes, 8, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt16x8>(args[1], args[1], lanes, 8, 8, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdInt16x8TypeMismatch, _u("swizzle"));
     }
@@ -781,7 +781,7 @@ namespace Js
                 lanes[i] = args[i + 3];
             }
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt16x8>(args[1], args[2], lanes, 16, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt16x8>(args[1], args[2], lanes, 8, 16, scriptContext);
 
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdInt16x8TypeMismatch, _u("shuffle"));

+ 12 - 10
lib/Runtime/Library/SimdInt32x4Lib.cpp

@@ -803,12 +803,13 @@ namespace Js
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
 
-            Var lane0 = args[2];
-            Var lane1 = args[3];
-            Var lane2 = args[4];
-            Var lane3 = args[5];
+            Var lanes[4];
+            lanes[0] = args[2];
+            lanes[1] = args[3];
+            lanes[2] = args[4];
+            lanes[3] = args[5];
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt32x4>(args[1], args[1], lane0, lane1, lane2, lane3, 4, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt32x4>(args[1], args[1], lanes, 4, 4, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdInt32x4TypeMismatch, _u("swizzle"));
     }
@@ -834,12 +835,13 @@ namespace Js
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
 
-            Var lane0 = args[3];
-            Var lane1 = args[4];
-            Var lane2 = args[5];
-            Var lane3 = args[6];
+            Var lanes[4];
+            lanes[0] = args[3];
+            lanes[1] = args[4];
+            lanes[2] = args[5];
+            lanes[3] = args[6];
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt32x4>(args[1], args[2], lane0, lane1, lane2, lane3, 8, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt32x4>(args[1], args[2], lanes, 4, 8, scriptContext);
 
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdInt32x4TypeMismatch, _u("shuffle"));

+ 2 - 2
lib/Runtime/Library/SimdInt8x16Lib.cpp

@@ -842,7 +842,7 @@ namespace Js
                 lanes[i] = args[i + 2];
             }
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt8x16, 16>(args[1], args[1], lanes, 16, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt8x16>(args[1], args[1], lanes, 16, 16, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdInt8x16TypeMismatch, _u("swizzle"));
     }
@@ -873,7 +873,7 @@ namespace Js
             {
                 lanes[i] = args[i + 3];
             }
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt8x16, 16>(args[1], args[2], lanes, 32, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDInt8x16>(args[1], args[2], lanes, 16, 32, scriptContext);
 
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdInt8x16TypeMismatch, _u("shuffle"));

+ 2 - 2
lib/Runtime/Library/SimdUint16x8Lib.cpp

@@ -752,7 +752,7 @@ namespace Js
             {
                 lanes[i] = args[i + 2];
             }
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint16x8>(args[1], args[1], lanes, 8, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint16x8>(args[1], args[1], lanes, 8, 8, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdUint16x8TypeMismatch, _u("swizzle"));
     }
@@ -783,7 +783,7 @@ namespace Js
             {
                 lanes[i] = args[i + 3];
             }
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint16x8>(args[1], args[2], lanes, 16, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint16x8>(args[1], args[2], lanes, 8, 16, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdUint16x8TypeMismatch, _u("shuffle"));
     }

+ 12 - 10
lib/Runtime/Library/SimdUint32x4Lib.cpp

@@ -749,12 +749,13 @@ namespace Js
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
 
-            Var lane0 = args[2];
-            Var lane1 = args[3];
-            Var lane2 = args[4];
-            Var lane3 = args[5];
+            Var lanes[4];
+            lanes[0] = args[2];
+            lanes[1] = args[3];
+            lanes[2] = args[4];
+            lanes[3] = args[5];
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint32x4>(args[1], args[1], lane0, lane1, lane2, lane3, 4, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint32x4>(args[1], args[1], lanes, 4, 4, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdUint32x4TypeMismatch, _u("swizzle"));
     }
@@ -780,12 +781,13 @@ namespace Js
                 JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedNumber, _u("Lane index"));
             }
 
-            Var lane0 = args[3];
-            Var lane1 = args[4];
-            Var lane2 = args[5];
-            Var lane3 = args[6];
+            Var lanes[4];
+            lanes[0] = args[3];
+            lanes[1] = args[4];
+            lanes[2] = args[5];
+            lanes[3] = args[6];
 
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint32x4>(args[1], args[2], lane0, lane1, lane2, lane3, 8, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint32x4>(args[1], args[2], lanes, 4, 8, scriptContext);
 
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdUint32x4TypeMismatch, _u("shuffle"));

+ 2 - 2
lib/Runtime/Library/SimdUint8x16Lib.cpp

@@ -752,7 +752,7 @@ namespace Js
             {
                 lanes[i] = args[i + 2];
             }
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint8x16, 16>(args[1], args[1], lanes, 16, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint8x16>(args[1], args[1], lanes, 16, 16, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdUint8x16TypeMismatch, _u("swizzle"));
     }
@@ -783,7 +783,7 @@ namespace Js
             {
                 lanes[i] = args[i + 3];
             }
-            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint8x16, 16>(args[1], args[2], lanes, 32, scriptContext);
+            return SIMDUtils::SIMD128SlowShuffle<JavascriptSIMDUint8x16>(args[1], args[2], lanes, 16, 32, scriptContext);
         }
         JavascriptError::ThrowTypeError(scriptContext, JSERR_SimdUint8x16TypeMismatch, _u("shuffle"));
     }