Jelajahi Sumber

SimdJsType spec initial commit

SIMD.js load/store type-spec

Lowerer support

Fix asm.js load/store failures. Load/store typespec still in progress

- Fixed globOpt to consider dataWidth when eliminating bound checks based on
range analysis.
- Lowerer was not loading from headSegment. Added loading/use of
  headSegment as baseOpnd.
- BoundChecks are not guaranteed to be extracted nor eliminated. Lowerer
  adds upper bound check if needed.

- Further fixes in lowerer:
	- Inlined bound check subtracts the needed dataWidth from the array
	length.
	- Wasn't scaling the array index properly.
- GlobOpt fixes:
	- Remove fall through code if type-spec doesn't happen was
	  asserting on dataUseCount != 0 on x64. Added an if-condition to
	  check before calling DecrementDataUseCount.
TypeSpec only happens on non-Virtual, non-Mixed typed arrays.

- All unit-tests pass at this point.
- Enabled bound-checks on x64 for Virtual and Mixed array ValueTypes.
- Several bug fixes in lowerer and GlobOpt
- Re-enabled SIMD.js ASM.js unit-tests to run with no-asmjs and with
  type-spec support (-asmjs-)
- Fixed performance issue where we bailed out excessively on
  BailOnNotBuiltIn. We didn't add Int32x4() to builtIn table

Add missing copyright notice

CR fixes:
- Use PSHUFD for Swizzle_I4. Combine if statments in LowerSwizzle
- Load correct lane mask, instead of shifting lane0 mask
nmostafa 10 tahun lalu
induk
melakukan
9b8e058afd

+ 56 - 13
lib/Backend/GlobOpt.cpp

@@ -1187,7 +1187,7 @@ GlobOpt::MergePredBlocksValueMaps(BasicBlock *block)
         BVSparse<JitArenaAllocator> tempBv2(this->tempAlloc);
 
         // For syms we made alive in loop header because of hoisting, use-before-def, or def in Loop body, set their valueInfo to definite.
-        // Made live on header AND in one of forceSimd128* or likelySimd128* vectors.
+        // Make live on header AND in one of forceSimd128* or likelySimd128* vectors.
         tempBv->Or(loop->likelySimd128F4SymsUsedBeforeDefined, loop->symsDefInLoop);
         tempBv->Or(loop->likelySimd128I4SymsUsedBeforeDefined);
         tempBv->Or(loop->forceSimd128F4SymsOnEntry);
@@ -6012,12 +6012,17 @@ GlobOpt::CopyProp(IR::Opnd *opnd, IR::Instr *instr, Value *val, IR::IndirOpnd *p
 
     // SIMD_JS
     // Don't copy-prop operand of SIMD instr with ExtendedArg operands. Each instr should have its exclusive EA sequence.
-    if (Js::IsSimd128Opcode(instr->m_opcode) && instr->GetSrc1() != nullptr && instr->GetSrc2() == nullptr &&  instr->GetSrc1()->GetStackSym()->IsSingleDef())
+    if (
+            Js::IsSimd128Opcode(instr->m_opcode) && 
+            instr->GetSrc1() != nullptr && 
+            instr->GetSrc1()->IsRegOpnd() && 
+            instr->GetSrc2() == nullptr
+       )
     {
-        IR::Instr *defInstr = instr->GetSrc1()->GetStackSym()->GetInstrDef();
-        if (defInstr->m_opcode == Js::OpCode::ExtendArg_A)
+        StackSym *sym = instr->GetSrc1()->GetStackSym();
+        if (sym && sym->IsSingleDef() && sym->GetInstrDef()->m_opcode == Js::OpCode::ExtendArg_A)
         {
-            return opnd;
+                return opnd;
         }
     }
 
@@ -15360,6 +15365,15 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
     bool needsHeadSegment, needsHeadSegmentLength, needsLength, needsBoundChecks;
     switch(instr->m_opcode)
     {
+        // SIMD_JS
+        case Js::OpCode::Simd128_LdArr_F4:
+        case Js::OpCode::Simd128_LdArr_I4:
+            // no type-spec for Asm.js
+            if (this->GetIsAsmJSFunc())
+            {
+                return;
+            }
+            // fall through
         case Js::OpCode::LdElemI_A:
         case Js::OpCode::LdMethodElem:
             if(!instr->GetSrc1()->IsIndirOpnd())
@@ -15370,10 +15384,19 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
             baseOwnerIndir = instr->GetSrc1()->AsIndirOpnd();
             baseOpnd = baseOwnerIndir->GetBaseOpnd();
             isProfilableLdElem = instr->m_opcode == Js::OpCode::LdElemI_A; // LdMethodElem is currently not profiled
+            isProfilableLdElem |= Js::IsSimd128Load(instr->m_opcode);
             needsBoundChecks = needsHeadSegmentLength = needsHeadSegment = isLoad = true;
             needsLength = isStore = isProfilableStElem = false;
             break;
 
+        // SIMD_JS
+        case Js::OpCode::Simd128_StArr_F4:
+        case Js::OpCode::Simd128_StArr_I4:
+            if (this->GetIsAsmJSFunc())
+            {
+                return;
+            }
+            // fall through
         case Js::OpCode::StElemI_A:
         case Js::OpCode::StElemI_A_Strict:
         case Js::OpCode::StElemC:
@@ -15385,6 +15408,7 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
             baseOwnerIndir = instr->GetDst()->AsIndirOpnd();
             baseOpnd = baseOwnerIndir->GetBaseOpnd();
             needsBoundChecks = isProfilableStElem = instr->m_opcode != Js::OpCode::StElemC;
+            isProfilableStElem |= Js::IsSimd128Store(instr->m_opcode);
             needsHeadSegmentLength = needsHeadSegment = isStore = true;
             needsLength = isLoad = isProfilableLdElem = false;
             break;
@@ -15606,8 +15630,17 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
     StackSym *const newHeadSegmentLengthSym = doHeadSegmentLengthLoad ? StackSym::New(TyUint32, instr->m_func) : nullptr;
     StackSym *const newLengthSym = doLengthLoad ? StackSym::New(TyUint32, instr->m_func) : nullptr;
 
-    bool canBailOutOnArrayAccessHelperCall =
-        (isProfilableLdElem || isProfilableStElem) &&
+    bool canBailOutOnArrayAccessHelperCall;
+
+    if (Js::IsSimd128LoadStore(instr->m_opcode))
+    {
+        // SIMD_JS
+        // simd load/store never call helper
+        canBailOutOnArrayAccessHelperCall = true; 
+    }
+    else
+    {
+        canBailOutOnArrayAccessHelperCall = (isProfilableLdElem || isProfilableStElem) &&
         DoEliminateArrayAccessHelperCall() &&
         !(
             instr->IsProfiledInstr() &&
@@ -15617,6 +15650,7 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
                     : instr->AsProfiledInstr()->u.stElemInfo->LikelyNeedsHelperCall()
             )
          );
+    }
 
     bool doExtractBoundChecks = false, eliminatedLowerBoundCheck = false, eliminatedUpperBoundCheck = false;
     StackSym *indexVarSym = nullptr;
@@ -15624,7 +15658,8 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
     IntConstantBounds indexConstantBounds;
     Value *headSegmentLengthValue = nullptr;
     IntConstantBounds headSegmentLengthConstantBounds;
-    if (baseValueType.IsLikelyOptimizedVirtualTypedArray())
+    
+    if (baseValueType.IsLikelyOptimizedVirtualTypedArray() && !Js::IsSimd128LoadStore(instr->m_opcode) /*Always extract bounds for SIMD */)
     {
         if (isProfilableStElem ||
             !instr->IsDstNotAlwaysConvertedToInt32() ||
@@ -15747,13 +15782,15 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
             }
             AssertVerify(headSegmentLengthValue->GetValueInfo()->TryGetIntConstantBounds(&headSegmentLengthConstantBounds));
 
-            if(ValueInfo::IsLessThan(
+            if (ValueInfo::IsLessThanOrEqualTo(
                     indexValue,
                     indexConstantBounds.LowerBound(),
                     indexConstantBounds.UpperBound(),
                     headSegmentLengthValue,
                     headSegmentLengthConstantBounds.LowerBound(),
-                    headSegmentLengthConstantBounds.UpperBound()))
+                    headSegmentLengthConstantBounds.UpperBound(),
+                    GetBoundCheckOffsetForSimd(newBaseValueType, instr, -1)
+                    ))
             {
                 eliminatedUpperBoundCheck = true;
                 if(eliminatedLowerBoundCheck)
@@ -16196,7 +16233,7 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
             Assert(!baseOwnerIndir->GetIndexOpnd() || baseOwnerIndir->GetIndexOpnd()->m_sym->IsTypeSpec());
             Assert(doHeadSegmentLengthLoad || headSegmentLengthIsAvailable);
             Assert(canBailOutOnArrayAccessHelperCall);
-            Assert(!isStore || instr->m_opcode == Js::OpCode::StElemI_A || instr->m_opcode == Js::OpCode::StElemI_A_Strict);
+            Assert(!isStore || instr->m_opcode == Js::OpCode::StElemI_A || instr->m_opcode == Js::OpCode::StElemI_A_Strict || Js::IsSimd128LoadStore(instr->m_opcode));
 
             StackSym *const headSegmentLengthSym =
                 headSegmentLengthIsAvailable ? baseArrayValueInfo->HeadSegmentLengthSym() : newHeadSegmentLengthSym;
@@ -16240,6 +16277,9 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
                     hoistHeadSegmentLengthLoadOutOfLoop,
                     failedToUpdateCompatibleLowerBoundCheck,
                     failedToUpdateCompatibleUpperBoundCheck);
+
+                // SIMD_JS
+                UpdateBoundCheckHoistInfoForSimd(upperBoundCheckHoistInfo, newBaseValueType, instr);
             }
 
             if(!eliminatedLowerBoundCheck)
@@ -16745,7 +16785,7 @@ GlobOpt::OptArraySrc(IR::Instr * *const instrRef)
                     lowerBound->SetIsJITOptimizedReg(true);
                     IR::Opnd* upperBound = IR::RegOpnd::New(headSegmentLengthSym, headSegmentLengthSym->GetType(), instr->m_func);
                     upperBound->SetIsJITOptimizedReg(true);
-                    const int offset = -1;
+                    const int offset = GetBoundCheckOffsetForSimd(newBaseValueType, instr, -1);
                     IR::Instr *boundCheck;
 
                     // index <= headSegmentLength - 1 (src1 <= src2 + dst)
@@ -19390,7 +19430,10 @@ GlobOpt::RemoveCodeAfterNoFallthroughInstr(IR::Instr *instr)
     FOREACH_SUCCESSOR_BLOCK_EDITING(deadBlock, this->currentBlock, iter)
     {
         this->currentBlock->RemoveDeadSucc(deadBlock, this->func->m_fg);
-        this->currentBlock->DecrementDataUseCount();
+        if (this->currentBlock->GetDataUseCount() > 0)
+        {
+            this->currentBlock->DecrementDataUseCount();
+        }
     } NEXT_SUCCESSOR_BLOCK_EDITING;
 }
 

+ 6 - 0
lib/Backend/GlobOpt.h

@@ -1384,11 +1384,17 @@ private:
     // SIMD_JS
     bool                    TypeSpecializeSimd128(IR::Instr *instr, Value **pSrc1Val, Value **pSrc2Val, Value **pDstVal);
     bool                    Simd128DoTypeSpec(IR::Instr *instr, const Value *src1Val, const Value *src2Val, const Value *dstVal);
+    bool                    Simd128DoTypeSpecLoadStore(IR::Instr *instr, const Value *src1Val, const Value *src2Val, const Value *dstVal, const ThreadContext::SimdFuncSignature *simdFuncSignature);
     bool                    Simd128CanTypeSpecOpnd(const ValueType opndType, const ValueType expectedType);
+    bool                    Simd128ValidateIfLaneIndex(const IR::Instr * instr, IR::Opnd * opnd, uint argPos);
+    
     IRType                  GetIRTypeFromValueType(const ValueType &valueType);
     ValueType               GetValueTypeFromIRType(const IRType &type);
     IR::BailOutKind         GetBailOutKindFromValueType(const ValueType &valueType);
     IR::Instr *             GetExtendedArg(IR::Instr *instr);
+    void                    UpdateBoundCheckHoistInfoForSimd(ArrayUpperBoundCheckHoistInfo &upperHoistInfo, ValueType arrValueType, const IR::Instr *instr);
+    int                     GetBoundCheckOffsetForSimd(ValueType arrValueType, const IR::Instr *instr, const int oldOffset = -1);
+    void                    Simd128SetIndirOpndType(IR::IndirOpnd *indirOpnd, Js::OpCode opcode);
 
 
     IR::Instr *             OptNewScObject(IR::Instr** instrPtr, Value* srcVal);

+ 9 - 1
lib/Backend/GlobOptIntBounds.h

@@ -99,7 +99,7 @@ public:
         Assert(loopCountMinusOneSym);
     }
 
-    LoopCount(StackSym *const loopCountMinusOneSym, StackSym *const loopCountSym) : 
+    LoopCount(StackSym *const loopCountMinusOneSym, StackSym *const loopCountSym) :
         loopCountMinusOneSym(loopCountMinusOneSym),
         loopCountSym(loopCountSym),
         hasBeenGenerated(true)
@@ -287,6 +287,12 @@ public:
         return offset;
     }
 
+    void UpdateOffset(int newOffset)
+    {
+        Assert(HasAnyInfo());
+        offset = newOffset;
+    }
+
     ValueNumber IndexValueNumber() const
     {
         Assert(HasAnyInfo());
@@ -324,6 +330,7 @@ public:
         return maxMagnitudeChange;
     }
 
+
 public:
     void SetCompatibleBoundCheck(BasicBlock *const compatibleBoundCheckBlock, StackSym *const indexSym, const int offset, const ValueNumber indexValueNumber);
     void SetLoop(::Loop *const loop, const int indexConstantValue, const bool isLoopCountBasedBound = false);
@@ -347,6 +354,7 @@ public:
     using Base::Loop;
     using Base::IndexSym;
     using Base::Offset;
+    using Base::UpdateOffset;
     using Base::IndexValueNumber;
     using Base::IndexValue;
     using Base::IndexConstantBounds;

+ 218 - 11
lib/Backend/GlobOptSimd128.cpp

@@ -18,7 +18,7 @@ Value **pSrc2Val,
 Value **pDstVal
 )
 {
-    if (func->m_workItem->GetFunctionBody()->GetIsAsmjsMode() || SIMD128_TYPE_SPEC_FLAG == false)
+    if (this->GetIsAsmJSFunc() || SIMD128_TYPE_SPEC_FLAG == false)
     {
         // no type-spec for ASMJS code or flag is off.
         return false;
@@ -49,18 +49,15 @@ Value **pDstVal
             if (IsSimd128F4TypeSpecialized(sym, this->currentBlock))
             {
                 type = TySimd128F4;
-                sym->GetSimd128F4EquivSym(func);
             }
             else if (IsSimd128I4TypeSpecialized(sym, this->currentBlock))
             {
                 type = TySimd128I4;
-                sym->GetSimd128I4EquivSym(func);
             }
             else
             {
                 return false;
             }
-
             ToTypeSpecUse(instr, instr->GetSrc1(), this->currentBlock, *pSrc1Val, nullptr, type, IR::BailOutSimd128F4Only /*not used for Ld_A*/);
             TypeSpecializeSimd128Dst(type, instr, *pSrc1Val, *pSrc1Val, pDstVal);
             return true;
@@ -96,6 +93,21 @@ Value **pDstVal
         instr->m_func->GetScriptContext()->GetThreadContext()->GetSimdFuncSignatureFromOpcode(instr->m_opcode, simdFuncSignature);
         // type-spec logic
 
+        // special handling for load/sotre
+        // OptArraySrc will type-spec the array and the index. We type-spec the value here.
+        if (Js::IsSimd128Load(instr->m_opcode))
+        {
+            TypeSpecializeSimd128Dst(GetIRTypeFromValueType(simdFuncSignature.returnType), instr, nullptr, *pSrc1Val, pDstVal);
+            Simd128SetIndirOpndType(instr->GetSrc1()->AsIndirOpnd(), instr->m_opcode);
+            return true;
+        }
+        if (Js::IsSimd128Store(instr->m_opcode))
+        {
+            ToTypeSpecUse(instr, instr->GetSrc1(), this->currentBlock, *pSrc1Val, nullptr, GetIRTypeFromValueType(simdFuncSignature.args[2]), GetBailOutKindFromValueType(simdFuncSignature.args[2]));
+            Simd128SetIndirOpndType(instr->GetDst()->AsIndirOpnd(), instr->m_opcode);
+            return true;
+        }
+
         // For op with ExtendArg. All sources are already type-specialized, just type-specialize dst
         if (simdFuncSignature.argCount <= 2)
         {
@@ -120,7 +132,7 @@ Value **pDstVal
         {
             // Emit bailout if not loop prepass.
             // The inliner inserts bytecodeUses of original args after the instruction. Bailout is safe.
-            IR::Instr * bailoutInstr = IR::BailOutInstr::New(Js::OpCode::BailOnNoSimdTypeSpec, IR::BailOutNoSimdTypeSpec, instr, this->func);
+            IR::Instr * bailoutInstr = IR::BailOutInstr::New(Js::OpCode::BailOnNoSimdTypeSpec, IR::BailOutNoSimdTypeSpec, instr, instr->m_func);
             bailoutInstr->SetByteCodeOffset(instr);
             instr->InsertAfter(bailoutInstr);
 
@@ -167,17 +179,22 @@ GlobOpt::Simd128DoTypeSpec(IR::Instr *instr, const Value *src1Val, const Value *
             // not implemented yet.
             return false;
         }
+        // special handling for Load/Store
+        if (Js::IsSimd128Load(instr->m_opcode) || Js::IsSimd128Store(instr->m_opcode))
+        {
+            return Simd128DoTypeSpecLoadStore(instr, src1Val, src2Val, dstVal, &simdFuncSignature);
+        }
 
         const uint argCount = simdFuncSignature.argCount;
         switch (argCount)
         {
         case 2:
             Assert(src2Val);
-            doTypeSpec = doTypeSpec && Simd128CanTypeSpecOpnd(src2Val->GetValueInfo()->Type(), simdFuncSignature.args[1]);
+            doTypeSpec = doTypeSpec && Simd128CanTypeSpecOpnd(src2Val->GetValueInfo()->Type(), simdFuncSignature.args[1]) && Simd128ValidateIfLaneIndex(instr, instr->GetSrc2(), 1);
             // fall-through
         case 1:
             Assert(src1Val);
-            doTypeSpec = doTypeSpec && Simd128CanTypeSpecOpnd(src1Val->GetValueInfo()->Type(), simdFuncSignature.args[0]);
+            doTypeSpec = doTypeSpec && Simd128CanTypeSpecOpnd(src1Val->GetValueInfo()->Type(), simdFuncSignature.args[0]) && Simd128ValidateIfLaneIndex(instr, instr->GetSrc1(), 0);
             break;
         default:
         {
@@ -231,13 +248,18 @@ GlobOpt::Simd128DoTypeSpec(IR::Instr *instr, const Value *src1Val, const Value *
                     {
                         return false;
                     }
+                    // Extra check if arg is a lane index
+                    if (!Simd128ValidateIfLaneIndex(instr, opnd, arg))
+                    {
+                        return false;
+                    }
                 }
                 else
                 {
                     Assert(UNREACHED);
                 }
 
-                eaInstr = GetExtendedArg(instr);
+                eaInstr = GetExtendedArg(eaInstr);
                 arg--;
             }
             // all args are type-spec'd
@@ -251,6 +273,70 @@ GlobOpt::Simd128DoTypeSpec(IR::Instr *instr, const Value *src1Val, const Value *
         // For ExtendArg, the expected type is encoded in the dst(link) operand.
         doTypeSpec = doTypeSpec && Simd128CanTypeSpecOpnd(src1Val->GetValueInfo()->Type(), instr->GetDst()->GetValueType());
     }
+
+    return doTypeSpec;
+}
+
+bool
+GlobOpt::Simd128DoTypeSpecLoadStore(IR::Instr *instr, const Value *src1Val, const Value *src2Val, const Value *dstVal, const ThreadContext::SimdFuncSignature *simdFuncSignature)
+{
+    IR::Opnd *baseOpnd = nullptr, *indexOpnd = nullptr, *valueOpnd = nullptr;
+    IR::Opnd *src, *dst;
+
+    bool doTypeSpec = true;
+
+    // value = Ld [arr + index]
+    // [arr + index] = St value
+    src = instr->GetSrc1();
+    dst = instr->GetDst();
+    Assert(dst && src && !instr->GetSrc2());
+
+    if (Js::IsSimd128Load(instr->m_opcode))
+    {
+        Assert(src->IsIndirOpnd());
+        baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
+        indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
+        valueOpnd = instr->GetDst();
+    }
+    else if (Js::IsSimd128Store(instr->m_opcode))
+    {
+        Assert(dst->IsIndirOpnd());
+        baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
+        indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
+        valueOpnd = instr->GetSrc1();
+
+        // St(arr, index, value). Make sure value can be Simd128 type-spec'ed
+        doTypeSpec = doTypeSpec && Simd128CanTypeSpecOpnd(FindValue(valueOpnd->AsRegOpnd()->m_sym)->GetValueInfo()->Type(), simdFuncSignature->args[2]);
+    }
+    else
+    {
+        Assert(UNREACHED);
+    }
+
+    // array and index operands should have been type-specialized in OptArraySrc: ValueTypes should be definite at this point. If not, don't type-spec.
+    // We can be in a loop prepass, where opnd ValueInfo is not set yet. Get the ValueInfo from the Value Table instead.
+    ValueType baseOpndType = FindValue(baseOpnd->AsRegOpnd()->m_sym)->GetValueInfo()->Type();
+    
+    if (IsLoopPrePass())
+    {
+        doTypeSpec = doTypeSpec && (baseOpndType.IsObject() && baseOpndType.IsTypedArray());
+        // indexOpnd might be missing if loading from [0]
+        if (indexOpnd != nullptr)
+        {
+            ValueType indexOpndType = FindValue(indexOpnd->AsRegOpnd()->m_sym)->GetValueInfo()->Type();
+            doTypeSpec = doTypeSpec && indexOpndType.IsLikelyInt();
+        }
+    }
+    else
+    {
+        doTypeSpec = doTypeSpec && (baseOpndType.IsObject() && baseOpndType.IsTypedArray());
+        if (indexOpnd != nullptr)
+        {
+            ValueType indexOpndType = FindValue(indexOpnd->AsRegOpnd()->m_sym)->GetValueInfo()->Type();
+            doTypeSpec = doTypeSpec && indexOpndType.IsInt();
+        }
+    }
+
     return doTypeSpec;
 }
 
@@ -258,9 +344,7 @@ GlobOpt::Simd128DoTypeSpec(IR::Instr *instr, const Value *src1Val, const Value *
 // We can type spec an opnd if:
 // Both profiled/propagated and expected types are not Simd128. e.g. expected type is f64/f32/i32 where there is a conversion logic from the incoming type.
 // Opnd type is (Likely) SIMD128 and matches expected type.
-// Opnd type is Object. e.g. possibly result of merging different SIMD types.
-// Simd128 values merged with Undefined/Null are still specialized.
-// Opnd type is LikelyUndefined: we don't have profile info for the operands.
+// Opnd type is Object. e.g. possibly result of merging different SIMD types. We specialize because we don't know which pass is dynamically taken.
 
 bool GlobOpt::Simd128CanTypeSpecOpnd(const ValueType opndType, ValueType expectedType)
 {
@@ -286,6 +370,66 @@ bool GlobOpt::Simd128CanTypeSpecOpnd(const ValueType opndType, ValueType expecte
     return false;
 }
 
+/*
+Given an instr, opnd and the opnd position. Return true if opnd is a lane index and valid, or not a lane index all-together..
+*/
+bool GlobOpt::Simd128ValidateIfLaneIndex(const IR::Instr * instr, IR::Opnd * opnd, uint argPos)
+{
+    Assert(instr);
+    Assert(opnd);
+
+    uint laneIndex;
+    uint argPosLo, argPosHi;
+    uint laneIndexLo, laneIndexHi;
+
+    // operation takes a lane index ?
+    switch (instr->m_opcode)
+    {
+    case Js::OpCode::Simd128_Swizzle_F4:
+    case Js::OpCode::Simd128_Swizzle_I4:
+        argPosLo = 1; argPosHi = 4;
+        laneIndexLo = 0; laneIndexHi = 3;
+        break;
+    case Js::OpCode::Simd128_Shuffle_F4:
+    case Js::OpCode::Simd128_Shuffle_I4:
+        argPosLo = 2; argPosHi = 5;
+        laneIndexLo = 0; laneIndexHi = 7;
+        break;
+    case Js::OpCode::Simd128_ReplaceLane_F4:
+    case Js::OpCode::Simd128_ReplaceLane_I4:
+    case Js::OpCode::Simd128_ExtractLane_F4:
+    case Js::OpCode::Simd128_ExtractLane_I4:
+        argPosLo = argPosHi = 1;
+        laneIndexLo = 0;  laneIndexHi = 3;
+        break;
+    default:
+        return true; // not a lane index
+    }
+
+    // arg in lanex index pos of operation ?
+    if (argPos < argPosLo || argPos > argPosHi)
+    {
+        return true; // not a lane index
+    }
+
+    // It is a lane index ...
+
+    // Arg is Int constant (literal or const prop'ed) ?
+    if (!opnd->IsIntConstOpnd())
+    {
+        return false;
+    }
+    laneIndex = (uint) opnd->AsIntConstOpnd()->GetValue();
+
+    // In range ?
+    if (laneIndex < laneIndexLo|| laneIndex > laneIndexHi)
+    {
+        return false;
+    }
+
+    return true;
+}
+
 IR::Instr * GlobOpt::GetExtendedArg(IR::Instr *instr)
 {
     IR::Opnd *src1, *src2;
@@ -379,3 +523,66 @@ IR::BailOutKind GlobOpt::GetBailOutKindFromValueType(const ValueType &valueType)
         return IR::BailOutSimd128I4Only;
     }
 }
+
+void
+GlobOpt::UpdateBoundCheckHoistInfoForSimd(ArrayUpperBoundCheckHoistInfo &upperHoistInfo, ValueType arrValueType, const IR::Instr *instr)
+{
+    if (!upperHoistInfo.HasAnyInfo())
+    {
+        return;
+    }
+
+    int newOffset = GetBoundCheckOffsetForSimd(arrValueType, instr, upperHoistInfo.Offset());
+    upperHoistInfo.UpdateOffset(newOffset);
+}
+
+int
+GlobOpt::GetBoundCheckOffsetForSimd(ValueType arrValueType, const IR::Instr *instr, const int oldOffset /* = -1 */)
+{
+    if (!(Js::IsSimd128LoadStore(instr->m_opcode)))
+    {
+        return oldOffset;
+    }
+
+    if (!arrValueType.IsTypedArray())
+    {
+        // no need to adjust for other array types, we will not type-spec (see Simd128DoTypeSpecLoadStore)
+        return oldOffset;
+    }
+
+    Assert(instr->dataWidth == 4 || instr->dataWidth == 8 || instr->dataWidth == 12 || instr->dataWidth == 16);
+
+    int numOfElems = Lowerer::SimdGetElementCountFromBytes(arrValueType, instr->dataWidth);
+
+    // we want to make bound checks more conservative. We compute how many extra elements we need to add to the bound check
+    // e.g. if original bound check is value <= Length + offset, and dataWidth is 16 bytes on Float32 array, then we need room for 4 elements. The bound check guarantees room for 1 element.
+    // Hence, we need to ensure 3 more: value <= Length + offset - 3
+    // We round up since dataWidth may span a partial lane (e.g. dataWidth = 12, bpe = 8 bytes)
+
+    int offsetBias = -(numOfElems - 1);
+    // we should always make an existing bound-check more conservative.
+    Assert(offsetBias <= 0);
+    return oldOffset + offsetBias;
+}
+
+void
+GlobOpt::Simd128SetIndirOpndType(IR::IndirOpnd *indirOpnd, Js::OpCode opcode)
+{
+    switch (opcode)
+    {
+    case Js::OpCode::Simd128_LdArr_F4:
+    case Js::OpCode::Simd128_StArr_F4:
+        indirOpnd->SetType(TySimd128F4);
+        indirOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4));
+        break;
+    case Js::OpCode::Simd128_LdArr_I4:
+    case Js::OpCode::Simd128_StArr_I4:
+        indirOpnd->SetType(TySimd128I4);
+        indirOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Int32x4));
+        break;
+    default:
+        Assert(UNREACHED);
+    }
+
+}
+

+ 1 - 0
lib/Backend/IR.cpp

@@ -2918,6 +2918,7 @@ Instr::TransferTo(Instr * instr)
     instr->m_src2 = this->m_src2;
     instr->dstIsAlwaysConvertedToInt32 = this->dstIsAlwaysConvertedToInt32;
     instr->dstIsAlwaysConvertedToNumber = this->dstIsAlwaysConvertedToNumber;
+    instr->dataWidth = this->dataWidth;
     IR::Opnd * dst = this->m_dst;
 
     if (dst)

+ 108 - 0
lib/Backend/Inline.cpp

@@ -2044,6 +2044,9 @@ Inline::InlineBuiltInFunction(IR::Instr *callInstr, Js::FunctionInfo *funcInfo,
         {
             callInstr->m_func->GetScriptContext()->GetThreadContext()->GetSimdFuncSignatureFromOpcode(callInstr->m_opcode, simdFuncSignature);
             Assert(simdFuncSignature.valid);
+            // if we have decided to inline, then actual arg count == signature arg count == required arg count from inlinee list (LibraryFunction.h)
+            Assert(simdFuncSignature.argCount == (uint)inlineCallArgCount);
+            Assert(simdFuncSignature.argCount == (uint)requiredInlineCallArgCount);
         }
 //
         inlineBuiltInEndInstr->IterateArgInstrs([&](IR::Instr* argInstr) {
@@ -2133,6 +2136,10 @@ Inline::InlineBuiltInFunction(IR::Instr *callInstr, Js::FunctionInfo *funcInfo,
             argInsertInstr = argInstr;
             return false;
         });
+
+        //SIMD_JS
+        Simd128FixLoadStoreInstr(builtInId, callInstr);
+
         if(inlineCallOpCode == Js::OpCode::InlineMathImul || inlineCallOpCode == Js::OpCode::InlineMathClz32)
         {
             // Convert:
@@ -5247,3 +5254,104 @@ Inline::GetMethodLdOpndForCallInstr(IR::Instr* callInstr)
     }
     return nullptr;
 }
+
+// SIMD_JS
+/*
+Fixes the format of a SIMD load/store to match format expected by globOpt. Namely:
+Load:
+    dst = Simd128LdArr arr, index
+    becomes
+    dst = Simd128LdArr [arr, indx]
+
+Store:
+    t3 =    EA arr
+    t2 =    EA index, t3
+    t1 =    EA value, t2
+            Simd128StArr t1
+    becomes
+    [arr, index] = Simd128StArr value
+
+It also sets width in bytes of data to be loaded. Needed for bound check generation in GlobOpt.
+*/
+void
+Inline::Simd128FixLoadStoreInstr(Js::BuiltinFunction builtInId, IR::Instr * callInstr)
+{
+    bool isStore = false;
+    callInstr->dataWidth = 0;
+    switch (builtInId)
+    {
+        case Js::BuiltinFunction::SIMD_Float32x4_Store:
+        case Js::BuiltinFunction::SIMD_Int32x4_Store:
+            isStore = true;
+            // fall through
+        case Js::BuiltinFunction::SIMD_Float32x4_Load:
+        case Js::BuiltinFunction::SIMD_Int32x4_Load:
+            callInstr->dataWidth = 16;
+            break;
+
+        case Js::BuiltinFunction::SIMD_Float32x4_Store3:
+        case Js::BuiltinFunction::SIMD_Int32x4_Store3:
+            isStore = true;
+            // fall through
+        case Js::BuiltinFunction::SIMD_Float32x4_Load3:
+        case Js::BuiltinFunction::SIMD_Int32x4_Load3:
+            callInstr->dataWidth = 12;
+            break;
+
+        case Js::BuiltinFunction::SIMD_Float32x4_Store2:
+        case Js::BuiltinFunction::SIMD_Int32x4_Store2:
+            isStore = true;
+            // fall through
+        case Js::BuiltinFunction::SIMD_Float32x4_Load2:
+        case Js::BuiltinFunction::SIMD_Int32x4_Load2:
+            callInstr->dataWidth = 8;
+            break;
+
+        case Js::BuiltinFunction::SIMD_Float32x4_Store1:
+        case Js::BuiltinFunction::SIMD_Int32x4_Store1:
+            isStore = true;
+            // fall through
+        case Js::BuiltinFunction::SIMD_Float32x4_Load1:
+        case Js::BuiltinFunction::SIMD_Int32x4_Load1:
+            callInstr->dataWidth = 4;
+            break;
+        default:
+            // nothing to do
+            return;
+    }
+
+    IR::IndirOpnd *indirOpnd;
+    if (!isStore)
+    {
+        // load
+        indirOpnd = IR::IndirOpnd::New(callInstr->GetSrc1()->AsRegOpnd(), callInstr->GetSrc2()->AsRegOpnd(), TyVar, callInstr->m_func);
+        callInstr->ReplaceSrc1(indirOpnd);
+        callInstr->FreeSrc2();
+    }
+    else
+    {
+        IR::Opnd *linkOpnd = callInstr->GetSrc1();
+        IR::Instr *eaInstr1, *eaInstr2, *eaInstr3;
+        IR::Opnd *value, *index, *arr;
+
+        eaInstr1 = linkOpnd->GetStackSym()->m_instrDef;
+        value = eaInstr1->GetSrc1();
+        linkOpnd = eaInstr1->GetSrc2();
+
+        eaInstr2 = linkOpnd->GetStackSym()->m_instrDef;
+        index = eaInstr2->GetSrc1();
+        linkOpnd = eaInstr2->GetSrc2();
+
+        eaInstr3 = linkOpnd->GetStackSym()->m_instrDef;
+        Assert(!eaInstr3->GetSrc2()); // end of args list
+        arr = eaInstr3->GetSrc1();
+
+        indirOpnd = IR::IndirOpnd::New(arr->AsRegOpnd(), index->AsRegOpnd(), TyVar, callInstr->m_func);
+        callInstr->SetDst(indirOpnd);
+        callInstr->ReplaceSrc1(value);
+
+        // remove ea instructions
+        eaInstr1->Remove(); eaInstr2->Remove(); eaInstr3->Remove();
+
+    }
+}

+ 1 - 0
lib/Backend/Inline.h

@@ -91,6 +91,7 @@ private:
     bool        IsArgumentsOpnd(IR::Opnd* opnd,SymID argumentsSymId);
     void        Cleanup(IR::Instr *callInstr);
     IR::PropertySymOpnd* GetMethodLdOpndForCallInstr(IR::Instr* callInstr);
+    void        Simd128FixLoadStoreInstr(Js::BuiltinFunction builtInId, IR::Instr * callInstr);
     IR::Instr* InsertInlineeBuiltInStartEndTags(IR::Instr* callInstr, uint actualcount, IR::Instr** builtinStartInstr = nullptr);
     bool IsInliningOutSideLoops(){return  topFunc->GetJnFunction()->GetHasLoops() && isInLoop == 0; }
 

+ 11 - 1
lib/Backend/Lower.cpp

@@ -13028,7 +13028,7 @@ IRType Lowerer::GetArrayIndirType(const ValueType valueType)
     return IndirTypes[static_cast<ValueType::TSize>(valueType.GetObjectType())];
 }
 
-BYTE Lowerer::GetArrayIndirScale(const ValueType valueType) const
+BYTE Lowerer::GetArrayIndirScale(const ValueType valueType)
 {
     Assert(valueType.IsLikelyAnyOptimizedArray());
     if(valueType.IsLikelyArrayOrObjectWithArray())
@@ -13046,6 +13046,16 @@ BYTE Lowerer::GetArrayIndirScale(const ValueType valueType) const
     return IndirScales[static_cast<ValueType::TSize>(valueType.GetObjectType())];
 }
 
+int Lowerer::SimdGetElementCountFromBytes(ValueType arrValueType, uint8 dataWidth)
+{
+    Assert(dataWidth == 4 || dataWidth == 8 || dataWidth == 12 || dataWidth == 16);
+    Assert(arrValueType.IsTypedArray());
+    BYTE bpe = 1 << Lowerer::GetArrayIndirScale(arrValueType);
+
+    // round up
+    return (int)::ceil(((float)dataWidth) / bpe);
+}
+
 bool Lowerer::ShouldGenerateArrayFastPath(
     const IR::Opnd *const arrayOpnd,
     const bool supportsObjectsWithArrays,

+ 2 - 3
lib/Backend/Lower.h

@@ -269,10 +269,9 @@ public:
     static uint32   GetArrayOffsetOfHeadSegment(const ValueType valueType);
     static uint32   GetArrayOffsetOfLength(const ValueType valueType);
     static IRType   GetArrayIndirType(const ValueType valueType);
-
+    static BYTE     GetArrayIndirScale(const ValueType valueType);
+    static int      SimdGetElementCountFromBytes(ValueType arrValueType, uint8 dataWidth);
 private:
-    BYTE            GetArrayIndirScale(const ValueType valueType) const;
-
     bool            ShouldGenerateArrayFastPath(const IR::Opnd *const arrayOpnd, const bool supportsObjectsWithArrays, const bool supportsTypedArrays, const bool requiresSse2ForFloatArrays) const;
     IR::RegOpnd *   LoadObjectArray(IR::RegOpnd *const baseOpnd, IR::Instr *const insertBeforeInstr);
     IR::RegOpnd *   GenerateArrayTest(IR::RegOpnd *const baseOpnd, IR::LabelInstr *const isNotObjectLabel, IR::LabelInstr *const isNotArrayLabel, IR::Instr *const insertBeforeInstr, const bool forceFloat, const bool isStore = false, const bool allowDefiniteArray = false);

+ 12 - 1
lib/Backend/LowerMDShared.h

@@ -320,15 +320,26 @@ public:
     IR::Instr*          Simd128LowerSelect(IR::Instr *instr);
     IR::Instr*          Simd128LowerNegI4(IR::Instr *instr);
     IR::Instr*          Simd128LowerMulI4(IR::Instr *instr);
+    IR::Instr*          Simd128AsmJsLowerLoadElem(IR::Instr *instr);
     IR::Instr*          Simd128LowerLoadElem(IR::Instr *instr);
+    IR::Instr*          Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor = 0);
+    IR::Instr*          Simd128AsmJsLowerStoreElem(IR::Instr *instr);
     IR::Instr*          Simd128LowerStoreElem(IR::Instr *instr);
-    IR::Instr*          Simd128LowerShuffle(IR::Instr *instr);
+    IR::Instr*          Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor = 0);
+    void                Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr);
+    void                Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr);
+    IR::Instr*          Simd128LowerSwizzle4(IR::Instr *instr);
+    IR::Instr*          Simd128LowerShuffle4(IR::Instr *instr);
+    BYTE                Simd128GetTypedArrBytesPerElem(ValueType arrType);
     IR::Opnd *          EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd);
     SList<IR::Opnd*>  * Simd128GetExtendedArgs(IR::Instr *instr);
     void                GenerateCheckedSimdLoad(IR::Instr * instr);
     void                GenerateSimdStore(IR::Instr * instr);
+    void                CheckShuffleLanes4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2);
+    void                InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *insertBeforeInstr);
 
 private:
+
     void GenerateFlagInlineCacheCheckForGetterSetter(
         IR::Instr * insertBeforeInstr,
         IR::RegOpnd * opndInlineCache,

+ 463 - 213
lib/Backend/LowerMDSharedSimd128.cpp

@@ -152,7 +152,17 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
     case Js::OpCode::Simd128_LdArrConst_I4:
     case Js::OpCode::Simd128_LdArrConst_F4:
     case Js::OpCode::Simd128_LdArrConst_D2:
-        return Simd128LowerLoadElem(instr);
+        if (m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode())
+        {
+            // with bound checks
+            return Simd128AsmJsLowerLoadElem(instr);
+        }
+        else
+        {
+            // non-AsmJs, boundChecks are extracted from instr
+            return Simd128LowerLoadElem(instr);
+        }
+        
 
     case Js::OpCode::Simd128_StArr_I4:
     case Js::OpCode::Simd128_StArr_F4:
@@ -160,15 +170,24 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
     case Js::OpCode::Simd128_StArrConst_I4:
     case Js::OpCode::Simd128_StArrConst_F4:
     case Js::OpCode::Simd128_StArrConst_D2:
-        return Simd128LowerStoreElem(instr);
+        if (m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode())
+        {
+            return Simd128AsmJsLowerStoreElem(instr);
+        }
+        else
+        {
+            return Simd128LowerStoreElem(instr);
+        }
 
     case Js::OpCode::Simd128_Swizzle_I4:
     case Js::OpCode::Simd128_Swizzle_F4:
     case Js::OpCode::Simd128_Swizzle_D2:
+        return Simd128LowerSwizzle4(instr);
+
     case Js::OpCode::Simd128_Shuffle_I4:
     case Js::OpCode::Simd128_Shuffle_F4:
     case Js::OpCode::Simd128_Shuffle_D2:
-        return Simd128LowerShuffle(instr);
+        return Simd128LowerShuffle4(instr);
 
 
     default:
@@ -440,6 +459,14 @@ IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
         Assert(UNREACHED);
     }
 
+    if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64())
+    {
+        IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
+        // CVTSD2SS regOpnd32.f32, src.f64    -- Convert regOpnd from f64 to f32
+        instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func));
+        src1 = regOpnd32;
+    }
+
     instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
     instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
 
@@ -668,6 +695,7 @@ IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
     return pInstr;
 }
 
+
 IR::Instr* LowererMD::SIMD128LowerReplaceLane(IR::Instr* instr)
 {
     SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
@@ -743,11 +771,13 @@ IR::Instr* LowererMD::SIMD128LowerReplaceLane(IR::Instr* instr)
     return prevInstr;
 }
 
-IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
+/*
+4 and 2 lane Swizzle.
+*/
+IR::Instr* LowererMD::Simd128LowerSwizzle4(IR::Instr* instr)
 {
     Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
     Js::OpCode irOpcode = instr->m_opcode;
-    bool isShuffle = false;
 
     SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
 
@@ -767,108 +797,191 @@ IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
     Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
 
     // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
-    if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 ||
-        irOpcode == Js::OpCode::Simd128_Swizzle_F4 ||
-        irOpcode == Js::OpCode::Simd128_Swizzle_D2)
-    {
-        isShuffle = false;
-
-        AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
-            srcs[2] && srcs[2]->IsIntConstOpnd() &&
-            (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[3] && srcs[3]->IsIntConstOpnd())) &&
-            (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
+    Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 || irOpcode == Js::OpCode::Simd128_Swizzle_D2);
+    AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
+              srcs[2] && srcs[2]->IsIntConstOpnd() &&
+              (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[3] && srcs[3]->IsIntConstOpnd())) &&
+              (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
 
-        if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
-        {
-            lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
-            lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
-            Assert(lane0 >= 0 && lane0 < 2);
-            Assert(lane1 >= 0 && lane1 < 2);
-            shufMask = (int8)((lane1 << 1) | lane0);
-        }
-        else
-        {
-            AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
-            lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
-            lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
-            lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
-            lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
-            Assert(lane1 >= 0 && lane1 < 4);
-            Assert(lane2 >= 0 && lane2 < 4);
-            Assert(lane2 >= 0 && lane2 < 4);
-            Assert(lane3 >= 0 && lane3 < 4);
-            shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
-        }
-    }
-    else if (irOpcode == Js::OpCode::Simd128_Shuffle_I4 ||
-        irOpcode == Js::OpCode::Simd128_Shuffle_F4 ||
-        irOpcode == Js::OpCode::Simd128_Shuffle_D2)
+    if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
     {
-        isShuffle = true;
-        Assert(srcs[1] && srcs[1]->IsSimd128());
-
-        AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
-            srcs[3] && srcs[3]->IsIntConstOpnd() &&
-            (irOpcode == Js::OpCode::Simd128_Shuffle_D2 || (srcs[4] && srcs[4]->IsIntConstOpnd())) &&
-            (irOpcode == Js::OpCode::Simd128_Shuffle_D2 || (srcs[5] && srcs[5]->IsIntConstOpnd())), "Type-specialized shuffle is supported only with constant lane indices");
-
-        if (irOpcode == Js::OpCode::Simd128_Shuffle_D2)
-        {
-            Assert(srcs[2]->IsIntConstOpnd() && srcs[3]->IsIntConstOpnd());
-
-            lane0 = srcs[2]->AsIntConstOpnd()->AsInt32();
-            lane1 = srcs[3]->AsIntConstOpnd()->AsInt32() - 2;
-            Assert(lane0 >= 0 && lane0 < 2);
-            Assert(lane1 >= 0 && lane1 < 2);
-            shufMask = (int8)((lane1 << 1) | lane0);
-        }
-        else
-        {
-            AnalysisAssert(srcs[4] != nullptr && srcs[5] != nullptr);
-            lane0 = srcs[2]->AsIntConstOpnd()->AsInt32();
-            lane1 = srcs[3]->AsIntConstOpnd()->AsInt32();
-            lane2 = srcs[4]->AsIntConstOpnd()->AsInt32() - 4;
-            lane3 = srcs[5]->AsIntConstOpnd()->AsInt32() - 4;
-            Assert(lane0 >= 0 && lane0 < 4);
-            Assert(lane1 >= 0 && lane1 < 4);
-            Assert(lane2 >= 0 && lane2 < 4);
-            Assert(lane3 >= 0 && lane3 < 4);
-            shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
-        }
+        lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
+        lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
+        Assert(lane0 >= 0 && lane0 < 2);
+        Assert(lane1 >= 0 && lane1 < 2);
+        shufMask = (int8)((lane1 << 1) | lane0);
+        shufOpcode = Js::OpCode::SHUFPD;
     }
     else
     {
-        Assert(UNREACHED);
-    }
-
-    if (instr->m_opcode == Js::OpCode::Simd128_Swizzle_D2 || instr->m_opcode == Js::OpCode::Simd128_Shuffle_D2)
-    {
-        shufOpcode = Js::OpCode::SHUFPD;
+        if (irOpcode == Js::OpCode::Simd128_Swizzle_I4)
+        {
+            shufOpcode = Js::OpCode::PSHUFD;
+        }
+        AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
+        lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
+        lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
+        lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
+        lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
+        Assert(lane1 >= 0 && lane1 < 4);
+        Assert(lane2 >= 0 && lane2 < 4);
+        Assert(lane2 >= 0 && lane2 < 4);
+        Assert(lane3 >= 0 && lane3 < 4);
+        shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
     }
 
-    // Lower shuffle/swizzle
-
     instr->m_opcode = shufOpcode;
     instr->SetDst(dst);
 
     // MOVAPS dst, src1
     instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
-    if (isShuffle)
+    // SHUF dst, dst, imm8
+    instr->SetSrc1(dst);
+    instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
+    return pInstr;
+}
+
+/*
+4 lane shuffle. Handles arbitrary lane values.
+*/
+
+IR::Instr* LowererMD::Simd128LowerShuffle4(IR::Instr* instr)
+{
+    Js::OpCode irOpcode = instr->m_opcode;
+    SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
+    IR::Opnd *dst = args->Pop();
+    IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
+
+    int i = 0;
+    while (!args->Empty() && i < 6)
     {
-        // SHUF dst, src2, imm8
-        instr->SetSrc1(srcs[1]);
+        srcs[i++] = args->Pop();
     }
-    else
+
+    uint8 lanes[4], lanesSrc[4];
+    uint fromSrc1, fromSrc2;
+    IR::Instr *pInstr = instr->m_prev;
+    
+    Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128());
+    Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4);
+
+    // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
+    AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
+              srcs[3] && srcs[3]->IsIntConstOpnd() &&
+              srcs[4] && srcs[4]->IsIntConstOpnd() &&
+              srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices");
+
+    
+
+    lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32();
+    lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32();
+    lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32();
+    lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32();
+    Assert(lanes[0] >= 0 && lanes[0] < 8);
+    Assert(lanes[1] >= 0 && lanes[1] < 8);
+    Assert(lanes[2] >= 0 && lanes[2] < 8);
+    Assert(lanes[3] >= 0 && lanes[3] < 8);
+
+    CheckShuffleLanes4(lanes, lanesSrc, &fromSrc1, &fromSrc2);
+    Assert(fromSrc1 + fromSrc2 == 4);
+
+    if (fromSrc1 == 4 || fromSrc2 == 4)
     {
-        // SHUF dst, dst, imm8
-        instr->SetSrc1(dst);
+        // can be done with a swizzle
+        IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1];
+        InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr);
+    }
+    else if (fromSrc1 == 2)
+    {
+        if (lanes[0] < 4 && lanes[1] < 4)
+        {
+            // x86 friendly shuffle
+            Assert(lanes[2] >= 4 && lanes[3] >= 4);
+            InsertShufps(lanes, dst, srcs[0], srcs[1], instr);
+        }
+        else
+        {
+            // arbitrary shuffle with 2 lanes from each src
+            uint8 ordLanes[4], reArrLanes[4];
+
+            // order lanes based on which src they come from
+            // compute re-arrangement mask
+            for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++)
+            {
+                if (lanesSrc[i] == 1)
+                {
+                    ordLanes[j1] = lanes[i];
+                    reArrLanes[i] = j1;
+                    j1++;
+                }
+                else
+                {
+                    Assert(lanesSrc[i] == 2);
+                    ordLanes[j2] = lanes[i];
+                    reArrLanes[i] = j2;
+                    j2++;
+                }
+            }
+            IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func);
+            InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr);
+            InsertShufps(reArrLanes, dst, temp, temp, instr);
+        }
+    }
+    else if (fromSrc1 == 3 || fromSrc2 == 3)
+    {
+        // shuffle with 3 lanes from one src, one from another
+
+        IR::Instr *newInstr;
+        IR::Opnd * majSrc, *minSrc;
+        IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func);
+        IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func);
+        IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func);
+        uint8 minorityLane = 0, maxLaneValue;
+        majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1];
+        minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0];
+        Assert(majSrc != minSrc);
+
+        // Algorithm:
+        // SHUFPS temp1, majSrc, lanes
+        // SHUFPS temp2, minSrc, lanes
+        // MOVUPS temp3, [minorityLane mask]
+        // ANDPS  temp2, temp3          // mask all lanes but minorityLane
+        // ANDNPS temp3, temp1          // zero minorityLane
+        // ORPS   dst, temp2, temp3
+
+        // find minorityLane to mask
+        maxLaneValue = minSrc == srcs[0] ? 4 : 8;
+        for (uint8 i = 0; i < 4; i++)
+        {
+            if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue)
+            {
+                minorityLane = i;
+                break;
+            }
+        }
+        IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New((void*)&X86_4LANES_MASKS[minorityLane], dst->GetType(), m_func);
+
+        InsertShufps(lanes, temp1, majSrc, majSrc, instr);
+        InsertShufps(lanes, temp2, minSrc, minSrc, instr);
+        newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func);
+        instr->InsertBefore(newInstr);
+        Legalize(newInstr);
+        newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func);
+        instr->InsertBefore(newInstr);
+        Legalize(newInstr);
+        newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func);
+        instr->InsertBefore(newInstr);
+        Legalize(newInstr);
+        newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func);
+        instr->InsertBefore(newInstr);
+        Legalize(newInstr);
     }
-    instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
 
+    instr->Remove();
     return pInstr;
 }
 
-IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
+IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
 {
     Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
         instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
@@ -893,38 +1006,7 @@ IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
     IR::Instr * done;
     if (indexOpnd ||  (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
     {
-        // CMP indexOpnd, src2(arrSize)
-        // JA $helper
-        // JMP $load
-        // $helper:
-        // Throw RangeError
-        // JMP $done
-        // $load:
-        // MOVUPS dst, src1([arrayBuffer + indexOpnd]) // or other based on data width
-        // $done:
-
-        uint32 bpe = 1;
-        switch (arrType.GetObjectType())
-        {
-        case ObjectType::Int8Array:
-        case ObjectType::Uint8Array:
-            break;
-        case ObjectType::Int16Array:
-        case ObjectType::Uint16Array:
-            bpe = 2;
-            break;
-        case ObjectType::Int32Array:
-        case ObjectType::Uint32Array:
-        case ObjectType::Float32Array:
-            bpe = 4;
-            break;
-        case ObjectType::Float64Array:
-            bpe = 8;
-            break;
-        default:
-            Assert(UNREACHED);
-        }
-
+        uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
         // bound check and helper
         done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
     }
@@ -935,66 +1017,96 @@ IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
         // (1) constant heap or (2) variable heap with constant index < 16MB.
         // Case (1) requires static bound check. Case (2) means we are always in bound.
 
-        instr->UnlinkDst();
-
         // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
 
         if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
         {
             m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
-            instr->FreeSrc1();
-            instr->FreeSrc2();
             instr->Remove();
             return instrPrev;
         }
-        instr->FreeSrc2();
         done = instr;
     }
 
+    return Simd128ConvertToLoad(dst, src1, dataWidth, instr);
+}
+
+IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
+{
+    Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
+
+    Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 || instr->m_opcode == Js::OpCode::Simd128_LdArr_F4);
+
+    IR::Opnd * src = instr->GetSrc1();
+    IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd();
+    IR::Opnd * dst = instr->GetDst();
+    ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
+
+    // If we type-specialized, then array is a definite typed-array.
+    Assert(arrType.IsObject() && arrType.IsTypedArray());
+
+    Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr);
+    Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr);
+    return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */);
+}
+
+IR::Instr *
+LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/)
+{
     IR::Instr *newInstr = nullptr;
+    IR::Instr * instrPrev = instr->m_prev;
+
+    // Type-specialized.
+    Assert(dst && dst->IsSimd128());
+    Assert(src->IsIndirOpnd());
+    if (scaleFactor > 0)
+    {
+        // needed only for non-Asmjs code
+        Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
+        src->AsIndirOpnd()->SetScale(scaleFactor);
+    }
+
     switch (dataWidth)
     {
     case 16:
         // MOVUPS dst, src1([arrayBuffer + indexOpnd])
-        newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, m_func);
+        newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func);
         instr->InsertBefore(newInstr);
         Legalize(newInstr);
         break;
     case 12:
     {
-       IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), m_func);
-
-       // MOVSD dst, src1([arrayBuffer + indexOpnd])
-       newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func);
-       instr->InsertBefore(newInstr);
-       Legalize(newInstr);
+        IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func);
 
-       // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
-       newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src1, m_func);
-       instr->InsertBefore(newInstr);
-       newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src1->AsIndirOpnd()->GetOffset() + 8, true);
-       Legalize(newInstr);
+        // MOVSD dst, src1([arrayBuffer + indexOpnd])
+        newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
+        instr->InsertBefore(newInstr);
+        Legalize(newInstr);
 
-       // PSLLDQ temp, 0x08
-       instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), m_func));
+        // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
+        newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func);
+        instr->InsertBefore(newInstr);
+        newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true);
+        Legalize(newInstr);
 
-       // ORPS dst, temp
-       newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, m_func);
-       instr->InsertBefore(newInstr);
-       Legalize(newInstr);
+        // PSLLDQ temp, 0x08
+        instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func));
 
-       break;
+        // ORPS dst, temp
+        newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func);
+        instr->InsertBefore(newInstr);
+        Legalize(newInstr);
+        break;
     }
-
     case 8:
         // MOVSD dst, src1([arrayBuffer + indexOpnd])
-        newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func);
+        newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
         instr->InsertBefore(newInstr);
         Legalize(newInstr);
         break;
     case 4:
         // MOVSS dst, src1([arrayBuffer + indexOpnd])
-        newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src1, m_func);
+        newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func);
         instr->InsertBefore(newInstr);
         Legalize(newInstr);
         break;
@@ -1006,7 +1118,7 @@ IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
     return instrPrev;
 }
 
-IR::Instr* LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
+IR::Instr* LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr)
 {
 
     Assert(instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
@@ -1030,7 +1142,7 @@ IR::Instr* LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
     Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
 
     IR::Instr * done;
-    bool doStore = true;
+    
     if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
     {
         // CMP indexOpnd, src2(arrSize)
@@ -1042,88 +1154,178 @@ IR::Instr* LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
         // $store:
         // MOV dst([arrayBuffer + indexOpnd]), src1
         // $done:
-
-        uint32 bpe = 1;
-        switch (arrType.GetObjectType())
-        {
-        case ObjectType::Int8Array:
-        case ObjectType::Uint8Array:
-            break;
-        case ObjectType::Int16Array:
-        case ObjectType::Uint16Array:
-            bpe = 2;
-            break;
-        case ObjectType::Int32Array:
-        case ObjectType::Uint32Array:
-        case ObjectType::Float32Array:
-            bpe = 4;
-            break;
-        case ObjectType::Float64Array:
-            bpe = 8;
-            break;
-        default:
-            Assert(UNREACHED);
-        }
+        uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
         done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
     }
     else
     {
-        instr->UnlinkDst();
-        instr->UnlinkSrc1();
-
         // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
         if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
         {
             m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
-
-            doStore = false;
-
-            src1->Free(m_func);
-            dst->Free(m_func);
+            instr->Remove();
+            return instrPrev;
         }
         done = instr;
-        instr->FreeSrc2();
     }
-    if (doStore)
+
+    return Simd128ConvertToStore(dst, src1, dataWidth, instr);
+}
+
+IR::Instr* LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
+{
+    Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
+    Assert(instr->m_opcode == Js::OpCode::Simd128_StArr_I4 || instr->m_opcode == Js::OpCode::Simd128_StArr_F4);
+
+    IR::Opnd * dst = instr->GetDst();
+    IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd();
+    IR::Opnd * src1 = instr->GetSrc1();
+    uint8 dataWidth = instr->dataWidth;
+    ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
+    
+    // If we type-specialized, then array is a definite type-array.
+    Assert(arrType.IsObject() && arrType.IsTypedArray());
+    
+    Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr);
+    Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr);
+    return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/);
+}
+
+IR::Instr * 
+LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */)
+{
+    IR::Instr * instrPrev = instr->m_prev;
+    
+
+    Assert(src1 && src1->IsSimd128());
+    Assert(dst->IsIndirOpnd());
+    
+    if (scaleFactor > 0)
     {
-        switch (dataWidth)
-        {
-        case 16:
-            // MOVUPS dst([arrayBuffer + indexOpnd]), src1
-            instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, m_func));
-            break;
-        case 12:
-        {
-                   IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), m_func);
-                   IR::Instr *movss;
-                   // MOVAPS temp, src
-                   instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, m_func));
-                   // MOVSD dst([arrayBuffer + indexOpnd]), temp
-                   instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, m_func));
-                   // PSRLDQ temp, 0x08
-                   instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), m_func));
-                   // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
-                   movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, m_func);
-                   instr->InsertBefore(movss);
-                   movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
-                   break;
-        }
-        case 8:
-            // MOVSD dst([arrayBuffer + indexOpnd]), src1
-            instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func));
-            break;
-        case 4:
-            // MOVSS dst([arrayBuffer + indexOpnd]), src1
-            instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, m_func));
-            break;
-        default:;
-            Assume(UNREACHED);
-        }
+        // needed only for non-Asmjs code
+        Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
+        dst->AsIndirOpnd()->SetScale(scaleFactor);
+    }
+    
+    switch (dataWidth)
+    {
+    case 16:
+        // MOVUPS dst([arrayBuffer + indexOpnd]), src1
+        instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func));
+        break;
+    case 12:
+    {
+               IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func);
+               IR::Instr *movss;
+               // MOVAPS temp, src
+               instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func));
+               // MOVSD dst([arrayBuffer + indexOpnd]), temp
+               instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func));
+               // PSRLDQ temp, 0x08
+               instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func));
+               // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
+               movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func);
+               instr->InsertBefore(movss);
+               movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
+               break;
+    }
+    case 8:
+        // MOVSD dst([arrayBuffer + indexOpnd]), src1
+        instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func));
+        break;
+    case 4:
+        // MOVSS dst([arrayBuffer + indexOpnd]), src1
+        instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func));
+        break;
+    default:;
+        Assume(UNREACHED);
     }
     instr->Remove();
     return instrPrev;
 }
 
+void
+LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
+{
+    Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
+
+    IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
+    IR::Opnd* headSegmentLengthOpnd;
+
+    if (arrayRegOpnd->EliminatedUpperBoundCheck())
+    {
+        // already eliminated or extracted by globOpt (OptArraySrc). Nothing to do. 
+        return;
+    }
+
+    if (arrayRegOpnd->HeadSegmentLengthSym())
+    {
+        headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func);
+    }
+    else
+    {
+        // (headSegmentLength = [base + offset(length)])
+        int lengthOffset;
+        lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType);
+        headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func);
+    }
+
+    IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr);
+    int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth);
+    if (indexOpnd)
+    {
+        //  MOV tmp, elemCount
+        //  ADD tmp, index
+        //  CMP tmp, Length  -- upper bound check
+        //  JBE  $storeLabel
+        //  Throw RuntimeError
+        //  skipLabel:
+        IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func);
+        IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true);
+        m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel);
+        Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel);
+        m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
+    }
+    else
+    {
+        // CMP Length, (offset + elemCount)
+        // JA $storeLabel
+        int32 offset = indirOpnd->GetOffset();
+        int32 index = offset + elemCount;
+        m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
+    }
+    m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
+    return;
+}
+
+void
+LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
+{
+
+    // For non-asm.js we check if headSeg symbol exists, else load it.
+    IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
+    IR::RegOpnd *headSegmentOpnd;
+    
+    if (arrayRegOpnd->HeadSegmentSym())
+    {
+        headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func);
+    }
+    else
+    {
+        // REVIEW: Is this needed ? Shouldn't globOpt make sure headSegSym is set and alive ?
+        //  MOV headSegment, [base + offset(head)]
+        int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
+        IR::IndirOpnd * indirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
+        headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func);
+        m_lowerer->InsertMove(headSegmentOpnd, indirOpnd, instr);
+    }
+
+    // change base to be the head segment instead of the array object
+    indirOpnd->SetBaseOpnd(headSegmentOpnd);
+}
+
+
+
 // Builds args list <dst, src1, src2, src3 ..>
 SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
 {
@@ -1333,3 +1535,51 @@ void LowererMD::GenerateSimdStore(IR::Instr * instr)
     instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVUPS, valDst, src, this->m_func));
     instr->Remove();
 }
+
+void LowererMD::CheckShuffleLanes4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2)
+{
+    Assert(lanes);
+    Assert(lanesSrc);
+    Assert(fromSrc1 && fromSrc2);
+    *fromSrc1 = 0;
+    *fromSrc2 = 0;
+    for (uint i = 0; i < 4; i++)
+    {
+        if (lanes[i] >= 0 && lanes[i] < 4)
+        {
+            (*fromSrc1)++;
+            lanesSrc[i] = 1;
+        }
+        else if (lanes[i] >= 4 && lanes[i] < 8)
+        {
+            (*fromSrc2)++;
+            lanesSrc[i] = 2;
+        }
+        else
+        {
+            Assert(UNREACHED);
+        }
+    }
+}
+
+void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr)
+{
+    int8 shufMask;
+    uint8 normLanes[4];
+    
+    for (uint i = 0; i < 4; i++)
+    {
+        normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i];
+    }
+    shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]);
+    // MOVAPS dst, src1
+    instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
+    // SHUF dst, src2, imm8
+    instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
+}
+
+BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType)
+{
+    return  (1 << Lowerer::GetArrayIndirScale(arrType));
+}
+

+ 1 - 1
lib/Backend/Sym.cpp

@@ -795,7 +795,7 @@ StackSym::GetTypeEquivSym(IRType type, Func *func)
     int i = 1;
     while (sym != this)
     {
-        Assert(i <= 5); // circular of at most 6 syms : var, f64, i32, simd128I4, simd128F4, simd12D2
+        Assert(i <= 5); // circular of at most 6 syms : var, f64, i32, simd128I4, simd128F4, simd128D2
         if (sym->m_type == type)
         {
             return sym;

+ 31 - 1
lib/Runtime/Base/ThreadContext.cpp

@@ -584,7 +584,7 @@ void ThreadContext::AddSimdFuncToMaps(Js::OpCode op, ...)
         return;
     }
     Js::FunctionInfo *funcInfo = va_arg(arguments, Js::FunctionInfo*);
-    simdFuncInfoToOpcodeMap->AddNew(funcInfo, op);
+    AddSimdFuncInfo(op, funcInfo);
 
     SimdFuncSignature simdFuncSignature;
     simdFuncSignature.valid = true;
@@ -601,6 +601,36 @@ void ThreadContext::AddSimdFuncToMaps(Js::OpCode op, ...)
     va_end(arguments);
 }
 
+void ThreadContext::AddSimdFuncInfo(Js::OpCode op, Js::FunctionInfo *funcInfo)
+{
+    // primary funcInfo
+    simdFuncInfoToOpcodeMap->AddNew(funcInfo, op);
+    // Entry points of SIMD loads/stores of non-full width all map to the same opcode. This is not captured in the opcode table, so add additional entry points here.
+    switch (op)
+    {
+    case Js::OpCode::Simd128_LdArr_F4:
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDFloat32x4Lib::EntryInfo::Load1, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDFloat32x4Lib::EntryInfo::Load2, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDFloat32x4Lib::EntryInfo::Load3, op);
+        break;
+    case Js::OpCode::Simd128_StArr_F4:
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDFloat32x4Lib::EntryInfo::Store1, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDFloat32x4Lib::EntryInfo::Store2, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDFloat32x4Lib::EntryInfo::Store3, op);
+        break;
+    case Js::OpCode::Simd128_LdArr_I4:
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDInt32x4Lib::EntryInfo::Load1, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDInt32x4Lib::EntryInfo::Load2, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDInt32x4Lib::EntryInfo::Load3, op);
+        break;
+    case Js::OpCode::Simd128_StArr_I4:
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDInt32x4Lib::EntryInfo::Store1, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDInt32x4Lib::EntryInfo::Store2, op);
+        simdFuncInfoToOpcodeMap->AddNew(&Js::SIMDInt32x4Lib::EntryInfo::Store3, op);
+        break;
+    }
+}
+
 Js::OpCode ThreadContext::GetSimdOpcodeFromFuncInfo(Js::FunctionInfo * funcInfo)
 {
     Assert(simdFuncInfoToOpcodeMap != nullptr);

+ 2 - 1
lib/Runtime/Base/ThreadContext.h

@@ -434,6 +434,7 @@ public:
     SimdFuncSignature *simdOpcodeToSignatureMap;
 
     void AddSimdFuncToMaps(Js::OpCode op, ...);
+    void AddSimdFuncInfo(Js::OpCode op, Js::FunctionInfo *funcInfo);
     Js::OpCode GetSimdOpcodeFromFuncInfo(Js::FunctionInfo * funcInfo);
     void GetSimdFuncSignatureFromOpcode(Js::OpCode op, SimdFuncSignature &funcSignature);
 #endif
@@ -1627,4 +1628,4 @@ public:
     {
         threadContext->SetIsProfilingUserCode(oldIsProfilingUserCode);
     }
-};
+};

+ 51 - 38
lib/Runtime/ByteCode/OpCodesSimd.h

@@ -47,35 +47,43 @@ SIMD.js opcodes
 #define MACRO_SIMD_BACKEND_ONLY_EXTEND(opcode, asmjsLayout, opCodeAttrAsmJs, OpCodeAttr)
 #endif
 
-//                              OpCode                             , LayoutAsmJs                , OpCodeAttrAsmJs,          OpCodeAttr              Addition macro args                      FuncInfo               Ret and Args ValueTypes
-//                                |                                    |                                |                       |                       |                                        |                           |
-//                                v                                    v                                v                       v                       v                                        v                           v
+
+// helper macros
+#define T_F4    ValueType::GetSimd128(ObjectType::Simd128Float32x4)
+#define T_I4    ValueType::GetSimd128(ObjectType::Simd128Int32x4)
+#define T_INT   ValueType::GetInt(false)
+#define T_FLT   ValueType::Float
+
+
+//                              OpCode                             , LayoutAsmJs                , OpCodeAttrAsmJs,          OpCodeAttr              Addition macro args     FuncInfo        Ret and Args ValueTypes
+//                                |                                    |                                |                       |                       |                      |                  |
+//                                v                                    v                                v                       v                       v                      v                  v
 MACRO_SIMD                  ( Simd128_Start                     , Empty                              , None           ,        None                          ,        0)               // Just a marker to indicate SIMD opcodes region
 
 // Int32x4
-MACRO_SIMD_WMS              ( Simd128_IntsToI4                  , Int32x4_1Int4                     , None           ,        OpCanCSE         ,       6,  &Js::SIMDInt32x4Lib::EntryInfo::Int32x4, ValueType::GetSimd128(ObjectType::Simd128Int32x4), ValueType::GetInt(false), ValueType::GetInt(false), ValueType::GetInt(false), ValueType::GetInt(false))
-MACRO_SIMD_WMS              ( Simd128_Splat_I4                  , Int32x4_1Int1                     , None           ,        OpCanCSE         ,       0)
+MACRO_SIMD_WMS              ( Simd128_IntsToI4                  , Int32x4_1Int4                     , None           ,        OpCanCSE         ,       6,  &Js::SIMDInt32x4Lib::EntryInfo::Int32x4, T_I4, T_INT, T_INT, T_INT, T_INT)
+MACRO_SIMD_WMS              ( Simd128_Splat_I4                  , Int32x4_1Int1                     , None           ,        OpCanCSE         ,       3,  &Js::SIMDInt32x4Lib::EntryInfo::Splat  , T_I4, T_INT)
 MACRO_SIMD_WMS              ( Simd128_FromFloat64x2_I4          , Int32x4_1Float64x2_1              , None           ,        OpCanCSE         ,       0)
 MACRO_SIMD_WMS              ( Simd128_FromFloat64x2Bits_I4      , Int32x4_1Float64x2_1              , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_FromFloat32x4_I4          , Int32x4_1Float32x4_1              , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_FromFloat32x4Bits_I4      , Int32x4_1Float32x4_1              , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Neg_I4                    , Int32x4_2                         , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Add_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       4,  &Js::SIMDInt32x4Lib::EntryInfo::Add,    ValueType::GetSimd128(ObjectType::Simd128Int32x4), ValueType::GetSimd128(ObjectType::Simd128Int32x4), ValueType::GetSimd128(ObjectType::Simd128Int32x4))
-MACRO_SIMD_WMS              ( Simd128_Sub_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Mul_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
+MACRO_SIMD_WMS              ( Simd128_FromFloat32x4_I4          , Int32x4_1Float32x4_1              , None           ,        OpCanCSE         ,       3,  &Js::SIMDInt32x4Lib::EntryInfo::FromFloat32x4     , T_I4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_FromFloat32x4Bits_I4      , Int32x4_1Float32x4_1              , None           ,        OpCanCSE         ,       3,  &Js::SIMDInt32x4Lib::EntryInfo::FromFloat32x4Bits , T_I4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Neg_I4                    , Int32x4_2                         , None           ,        OpCanCSE         ,       3,  &Js::SIMDInt32x4Lib::EntryInfo::Neg,    T_I4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_Add_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       4,  &Js::SIMDInt32x4Lib::EntryInfo::Add,    T_I4, T_I4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_Sub_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       4,  &Js::SIMDInt32x4Lib::EntryInfo::Sub,    T_I4, T_I4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_Mul_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       4,  &Js::SIMDInt32x4Lib::EntryInfo::Mul,    T_I4, T_I4, T_I4)
 MACRO_SIMD_WMS              ( Simd128_Lt_I4                     , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
 MACRO_SIMD_WMS              ( Simd128_Gt_I4                     , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
 MACRO_SIMD_WMS              ( Simd128_Eq_I4                     , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
 MACRO_SIMD_WMS              ( Simd128_Select_I4                 , Int32x4_4                         , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_And_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Or_I4                     , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Xor_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Not_I4                    , Int32x4_2                         , None           ,        OpCanCSE         ,       0)
+MACRO_SIMD_WMS              ( Simd128_And_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       4,  &Js::SIMDInt32x4Lib::EntryInfo::And,    T_I4, T_I4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_Or_I4                     , Int32x4_3                         , None           ,        OpCanCSE         ,       4,  &Js::SIMDInt32x4Lib::EntryInfo::Or,     T_I4, T_I4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_Xor_I4                    , Int32x4_3                         , None           ,        OpCanCSE         ,       4,  &Js::SIMDInt32x4Lib::EntryInfo::Xor,    T_I4, T_I4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_Not_I4                    , Int32x4_2                         , None           ,        OpCanCSE         ,       3,  &Js::SIMDInt32x4Lib::EntryInfo::Not,    T_I4, T_I4)
 MACRO_SIMD_WMS              ( Simd128_Shr_I4                    , Int32x4_2Int1                     , None           ,        OpCanCSE         ,       0)
 MACRO_SIMD_WMS              ( Simd128_ShrA_I4                   , Int32x4_2Int1                     , None           ,        OpCanCSE         ,       0)
 MACRO_SIMD_WMS              ( Simd128_Shl_I4                    , Int32x4_2Int1                     , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Swizzle_I4                , Int32x4_2Int4                     , None           ,        OpCanCSE         ,       0)
-MACRO_SIMD_WMS              ( Simd128_Shuffle_I4                , Int32x4_3Int4                     , None           ,        OpCanCSE         ,       0)
+MACRO_SIMD_WMS              ( Simd128_Swizzle_I4                , Int32x4_2Int4                     , None           ,        OpCanCSE         ,       7,   &Js::SIMDInt32x4Lib::EntryInfo::Swizzle,   T_I4, T_I4, T_INT, T_INT, T_INT, T_INT)
+MACRO_SIMD_WMS              ( Simd128_Shuffle_I4                , Int32x4_3Int4                     , None           ,        OpCanCSE         ,       8,   &Js::SIMDInt32x4Lib::EntryInfo::Shuffle,   T_I4, T_I4, T_I4, T_INT, T_INT, T_INT, T_INT)
 MACRO_SIMD_WMS              ( Simd128_LdSignMask_I4             , Int1Int32x4_1                     , None           ,        OpCanCSE         ,       0)
 MACRO_SIMD_ASMJS_ONLY_WMS   ( Simd128_Ld_I4                     , Int32x4_2                         , None           ,        None                      )
 MACRO_SIMD_ASMJS_ONLY_WMS   ( Simd128_LdSlot_I4                 , ElementSlot                       , None           ,        None                      )
@@ -85,26 +93,26 @@ MACRO_SIMD_ASMJS_ONLY_WMS   ( Simd128_I_ArgOut_I4               , Reg1Int32x4_1
 MACRO_SIMD_ASMJS_ONLY_WMS   ( Simd128_I_Conv_VTI4               , Int32x4_2                         , None           ,        None                      )
 
 // Float32x4
-MACRO_SIMD_WMS              ( Simd128_FloatsToF4                , Float32x4_1Float4                 , None           ,        OpCanCSE          ,      6,   &Js::SIMDFloat32x4Lib::EntryInfo::Float32x4, ValueType::GetSimd128(ObjectType::Simd128Float32x4), ValueType::Float, ValueType::Float, ValueType::Float, ValueType::Float)
-MACRO_SIMD_WMS              ( Simd128_Splat_F4                  , Float32x4_1Float1                 , None           ,        OpCanCSE          ,      0)
+MACRO_SIMD_WMS              ( Simd128_FloatsToF4                , Float32x4_1Float4                 , None           ,        OpCanCSE          ,      6,   &Js::SIMDFloat32x4Lib::EntryInfo::Float32x4, T_F4, T_FLT, T_FLT, T_FLT, T_FLT)
+MACRO_SIMD_WMS              ( Simd128_Splat_F4                  , Float32x4_1Float1                 , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::Splat    , T_F4, T_FLT)
 MACRO_SIMD_WMS              ( Simd128_FromFloat64x2_F4          , Float32x4_1Float64x2_1            , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_WMS              ( Simd128_FromFloat64x2Bits_F4      , Float32x4_1Float64x2_1            , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_FromInt32x4_F4            , Float32x4_1Int32x4_1              , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_FromInt32x4Bits_F4        , Float32x4_1Int32x4_1              , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Abs_F4                    , Float32x4_2                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Neg_F4                    , Float32x4_2                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Add_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      4,   &Js::SIMDFloat32x4Lib::EntryInfo::Add,   ValueType::GetSimd128(ObjectType::Simd128Float32x4), ValueType::GetSimd128(ObjectType::Simd128Float32x4), ValueType::GetSimd128(ObjectType::Simd128Float32x4))
-MACRO_SIMD_WMS              ( Simd128_Sub_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Mul_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Div_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
+MACRO_SIMD_WMS              ( Simd128_FromInt32x4_F4            , Float32x4_1Int32x4_1              , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::FromInt32x4    , T_F4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_FromInt32x4Bits_F4        , Float32x4_1Int32x4_1              , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::FromInt32x4Bits, T_F4, T_I4)
+MACRO_SIMD_WMS              ( Simd128_Abs_F4                    , Float32x4_2                       , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::Abs,   T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Neg_F4                    , Float32x4_2                       , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::Neg,   T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Add_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      4,   &Js::SIMDFloat32x4Lib::EntryInfo::Add,   T_F4, T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Sub_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      4,   &Js::SIMDFloat32x4Lib::EntryInfo::Sub,   T_F4, T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Mul_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      4,   &Js::SIMDFloat32x4Lib::EntryInfo::Mul,   T_F4, T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Div_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      4,   &Js::SIMDFloat32x4Lib::EntryInfo::Div,   T_F4, T_F4, T_F4)
 MACRO_SIMD_WMS              ( Simd128_Clamp_F4                  , Float32x4_4                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Min_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Max_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Rcp_F4                    , Float32x4_2                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_RcpSqrt_F4                , Float32x4_2                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Sqrt_F4                   , Float32x4_2                       , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Swizzle_F4                , Float32x4_2Int4                   , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_WMS              ( Simd128_Shuffle_F4                , Float32x4_3Int4                   , None           ,        OpCanCSE          ,      0)
+MACRO_SIMD_WMS              ( Simd128_Min_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      4,   &Js::SIMDFloat32x4Lib::EntryInfo::Min,   T_F4, T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Max_F4                    , Float32x4_3                       , None           ,        OpCanCSE          ,      4,   &Js::SIMDFloat32x4Lib::EntryInfo::Max,   T_F4, T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Rcp_F4                    , Float32x4_2                       , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::Reciprocal,     T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_RcpSqrt_F4                , Float32x4_2                       , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::ReciprocalSqrt, T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Sqrt_F4                   , Float32x4_2                       , None           ,        OpCanCSE          ,      3,   &Js::SIMDFloat32x4Lib::EntryInfo::Sqrt,   T_F4, T_F4)
+MACRO_SIMD_WMS              ( Simd128_Swizzle_F4                , Float32x4_2Int4                   , None           ,        OpCanCSE          ,      7,   &Js::SIMDFloat32x4Lib::EntryInfo::Swizzle,   T_F4, T_F4, T_INT, T_INT, T_INT, T_INT)
+MACRO_SIMD_WMS              ( Simd128_Shuffle_F4                , Float32x4_3Int4                   , None           ,        OpCanCSE          ,      8,   &Js::SIMDFloat32x4Lib::EntryInfo::Shuffle,   T_F4, T_F4, T_F4, T_INT, T_INT, T_INT, T_INT)
 MACRO_SIMD_WMS              ( Simd128_Lt_F4                     , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_WMS              ( Simd128_LtEq_F4                   , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_WMS              ( Simd128_Eq_F4                     , Float32x4_3                       , None           ,        OpCanCSE          ,      0)
@@ -171,16 +179,16 @@ MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS   ( Simd128_StSlot_D2          , ElementSlot
 MACRO_SIMD_EXTEND_WMS     ( Simd128_Swizzle_D2                  , Float64x2_2Int2                   , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_Shuffle_D2                  , Float64x2_3Int2                   , None           ,        OpCanCSE          ,      0)
 
-MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArr_I4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
+MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArr_I4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      4, &Js::SIMDInt32x4Lib::EntryInfo::Load, T_I4, ValueType::GetObject(ObjectType::Int8Array) /*dummy place-holder for any typed array*/, T_INT)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArrConst_I4               , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArr_F4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
+MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArr_F4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      4, &Js::SIMDFloat32x4Lib::EntryInfo::Load, T_F4, ValueType::GetObject(ObjectType::Int8Array) /*dummy place-holder for any typed array*/, T_INT)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArrConst_F4               , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArr_D2                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_LdArrConst_D2               , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
 
-MACRO_SIMD_EXTEND_WMS     ( Simd128_StArr_I4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
+MACRO_SIMD_EXTEND_WMS     ( Simd128_StArr_I4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      5, &Js::SIMDInt32x4Lib::EntryInfo::Store, ValueType::Undefined, ValueType::GetObject(ObjectType::Int8Array) /*dummy place-holder for any typed array*/, T_INT, T_I4)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_StArrConst_I4               , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
-MACRO_SIMD_EXTEND_WMS     ( Simd128_StArr_F4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
+MACRO_SIMD_EXTEND_WMS     ( Simd128_StArr_F4                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      5, &Js::SIMDFloat32x4Lib::EntryInfo::Store, ValueType::Undefined, ValueType::GetObject(ObjectType::Int8Array) /*dummy place-holder for any typed array*/, T_INT, T_F4)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_StArrConst_F4               , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_StArr_D2                    , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_EXTEND_WMS     ( Simd128_StArrConst_D2               , AsmSimdTypedArr                   , None           ,        OpCanCSE          ,      0)
@@ -191,6 +199,11 @@ MACRO_SIMD_EXTEND_WMS     ( Simd128_ExtractLane_F4              , Float1Float32x
 MACRO_SIMD_EXTEND_WMS     ( Simd128_ReplaceLane_F4              , Float32x4_2Int1Float1             , None           ,        OpCanCSE          ,      0)
 MACRO_SIMD_EXTEND         ( Simd128_End_Extend                  , Empty                             , None           ,        None              ,      0)   // Just a marker to indicate SIMD opcodes region
 
+#undef T_F4
+#undef T_I4
+#undef T_INT
+#undef T_FLT
+
 #undef MACRO_SIMD
 #undef MACRO_SIMD_WMS
 #undef MACRO_SIMD_ASMJS_ONLY_WMS

+ 3 - 0
lib/Runtime/ByteCode/OpLayouts.h

@@ -53,6 +53,9 @@ namespace Js {
 #if ENABLE_NATIVE_CODEGEN
     inline bool IsSimd128Opcode(OpCode o) { return (o > Js::OpCode::Simd128_Start && o < Js::OpCode::Simd128_End) || (o > Js::OpCode::Simd128_Start_Extend && o < Js::OpCode::Simd128_End_Extend); }
     inline uint Simd128OpcodeCount() { return (uint)(Js::OpCode::Simd128_End - Js::OpCode::Simd128_Start) + 1 + (uint)(Js::OpCode::Simd128_End_Extend - Js::OpCode::Simd128_Start_Extend) + 1; }
+    inline bool IsSimd128Load(OpCode o){ return o == Js::OpCode::Simd128_LdArr_I4 || o == Js::OpCode::Simd128_LdArr_F4; }
+    inline bool IsSimd128Store(OpCode o){ return o == Js::OpCode::Simd128_StArr_I4 || o == Js::OpCode::Simd128_StArr_F4; }
+    inline bool IsSimd128LoadStore(OpCode o) { return IsSimd128Load(o) || IsSimd128Store(o); }
 #endif
 
     ///----------------------------------------------------------------------------

+ 0 - 1
lib/Runtime/Language/SIMDUtils.cpp

@@ -6,7 +6,6 @@
 
 namespace Js
 {
-
     int32 SIMDCheckTypedArrayIndex(ScriptContext* scriptContext, Var index)
     {
         int32 int32Value;

+ 6 - 3
lib/Runtime/Language/SIMDUtils.h

@@ -4,8 +4,6 @@
 //-------------------------------------------------------------------------------------------------------
 #pragma once
 
-
-
 #define SIMD128_TYPE_SPEC_FLAG Js::Configuration::Global.flags.Simd128TypeSpec
 
 // The representations below assume little-endian.
@@ -113,6 +111,11 @@ const _x86_SIMDValue X86_ALL_NEG_ONES = { 0xffffffff, 0xffffffff, 0xffffffff, 0x
 const _x86_SIMDValue X86_ALL_ZEROS    = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
 const _x86_SIMDValue X86_LANE_W_ZEROS = { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000 };
 
+const _x86_SIMDValue X86_4LANES_MASKS[] = {{ 0xffffffff, 0x00000000, 0x00000000, 0x00000000 }, 
+                                           { 0x00000000, 0xffffffff, 0x00000000, 0x00000000 },
+                                           { 0x00000000, 0x00000000, 0xffffffff, 0x00000000 },
+                                           { 0x00000000, 0x00000000, 0x00000000, 0xffffffff }};
+
 typedef _x86_SIMDValue X86SIMDValue;
 CompileAssert(sizeof(X86SIMDValue) == 16);
 #endif
@@ -120,6 +123,7 @@ CompileAssert(sizeof(X86SIMDValue) == 16);
 typedef SIMDValue     AsmJsSIMDValue; // alias for asmjs
 CompileAssert(sizeof(SIMDValue) == 16);
 
+class ValueType;
 
 namespace Js {
     int32 SIMDCheckTypedArrayIndex(ScriptContext* scriptContext, Var index);
@@ -167,5 +171,4 @@ namespace Js {
     enum class OpCode : ushort;
     uint32 SimdOpcodeAsIndex(Js::OpCode op);
 
-
 }

+ 60 - 58
lib/Runtime/Library/JavascriptLibrary.cpp

@@ -2687,63 +2687,64 @@ namespace Js
 
         // Float32x4
         JavascriptFunction* float32x4Function = library->AddFunctionToLibraryObject(simdObject, PropertyIds::Float32x4, &SIMDFloat32x4Lib::EntryInfo::Float32x4, 5, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::check, &SIMDFloat32x4Lib::EntryInfo::Check, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Float32x4] = float32x4Function;
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Check] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::check, &SIMDFloat32x4Lib::EntryInfo::Check, 2, PropertyNone);
         library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::zero, &SIMDFloat32x4Lib::EntryInfo::Zero, 1, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::splat, &SIMDFloat32x4Lib::EntryInfo::Splat, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Splat] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::splat, &SIMDFloat32x4Lib::EntryInfo::Splat, 2, PropertyNone);
 
         // Lane Access
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::extractLane, &SIMDFloat32x4Lib::EntryInfo::ExtractLane, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::replaceLane, &SIMDFloat32x4Lib::EntryInfo::ReplaceLane, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_ExtractLane] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::extractLane, &SIMDFloat32x4Lib::EntryInfo::ExtractLane, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_ReplaceLane] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::replaceLane, &SIMDFloat32x4Lib::EntryInfo::ReplaceLane, 4, PropertyNone);
 
 
         // type conversions
         library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::fromFloat64x2,     &SIMDFloat32x4Lib::EntryInfo::FromFloat64x2,     2, PropertyNone);
         library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::fromFloat64x2Bits, &SIMDFloat32x4Lib::EntryInfo::FromFloat64x2Bits, 2, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::fromInt32x4,       &SIMDFloat32x4Lib::EntryInfo::FromInt32x4,       2, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::fromInt32x4Bits,   &SIMDFloat32x4Lib::EntryInfo::FromInt32x4Bits,   2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_FromInt32x4] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::fromInt32x4, &SIMDFloat32x4Lib::EntryInfo::FromInt32x4, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_FromInt32x4Bits] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::fromInt32x4Bits, &SIMDFloat32x4Lib::EntryInfo::FromInt32x4Bits, 2, PropertyNone);
 
         // binary ops
         builtinFuncs[BuiltinFunction::SIMD_Float32x4_Add] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::add, &SIMDFloat32x4Lib::EntryInfo::Add, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::sub,    &SIMDFloat32x4Lib::EntryInfo::Sub,   3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::mul,    &SIMDFloat32x4Lib::EntryInfo::Mul,   3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::div,    &SIMDFloat32x4Lib::EntryInfo::Div,   3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::and,    &SIMDFloat32x4Lib::EntryInfo::And,   3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::or,     &SIMDFloat32x4Lib::EntryInfo::Or,    3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::xor,    &SIMDFloat32x4Lib::EntryInfo::Xor,   3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::min,    &SIMDFloat32x4Lib::EntryInfo::Min,   3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::max,    &SIMDFloat32x4Lib::EntryInfo::Max,   3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Sub] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::sub, &SIMDFloat32x4Lib::EntryInfo::Sub, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Mul] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::mul, &SIMDFloat32x4Lib::EntryInfo::Mul, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Div] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::div, &SIMDFloat32x4Lib::EntryInfo::Div, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::and, &SIMDFloat32x4Lib::EntryInfo::And, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::or, &SIMDFloat32x4Lib::EntryInfo::Or, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::xor, &SIMDFloat32x4Lib::EntryInfo::Xor, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Min] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::min, &SIMDFloat32x4Lib::EntryInfo::Min, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Max] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::max, &SIMDFloat32x4Lib::EntryInfo::Max, 3, PropertyNone);
         library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::scale,  &SIMDFloat32x4Lib::EntryInfo::Scale, 3, PropertyNone);
 
         // unary ops
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::abs,            &SIMDFloat32x4Lib::EntryInfo::Abs,            2, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::neg,            &SIMDFloat32x4Lib::EntryInfo::Neg,            2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Abs] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::abs, &SIMDFloat32x4Lib::EntryInfo::Abs, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Neg] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::neg, &SIMDFloat32x4Lib::EntryInfo::Neg, 2, PropertyNone);
         library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::not,            &SIMDFloat32x4Lib::EntryInfo::Not,            2, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::sqrt,           &SIMDFloat32x4Lib::EntryInfo::Sqrt,           2, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::reciprocal,     &SIMDFloat32x4Lib::EntryInfo::Reciprocal,     2, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::reciprocalSqrt, &SIMDFloat32x4Lib::EntryInfo::ReciprocalSqrt, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Sqrt] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::sqrt, &SIMDFloat32x4Lib::EntryInfo::Sqrt, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Reciprocal] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::reciprocal, &SIMDFloat32x4Lib::EntryInfo::Reciprocal, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_ReciprocalSqrt] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::reciprocalSqrt, &SIMDFloat32x4Lib::EntryInfo::ReciprocalSqrt, 2, PropertyNone);
         // compare ops
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::lessThan,           &SIMDFloat32x4Lib::EntryInfo::LessThan,          3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::lessThanOrEqual,    &SIMDFloat32x4Lib::EntryInfo::LessThanOrEqual,   3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::equal,              &SIMDFloat32x4Lib::EntryInfo::Equal,             3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::notEqual,           &SIMDFloat32x4Lib::EntryInfo::NotEqual,          3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::greaterThan,        &SIMDFloat32x4Lib::EntryInfo::GreaterThan,       3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::greaterThanOrEqual, &SIMDFloat32x4Lib::EntryInfo::GreaterThanOrEqual,3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::lessThan, &SIMDFloat32x4Lib::EntryInfo::LessThan, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::lessThanOrEqual, &SIMDFloat32x4Lib::EntryInfo::LessThanOrEqual, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::equal, &SIMDFloat32x4Lib::EntryInfo::Equal, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::notEqual, &SIMDFloat32x4Lib::EntryInfo::NotEqual, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::greaterThan, &SIMDFloat32x4Lib::EntryInfo::GreaterThan, 3, PropertyNone);
+        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::greaterThanOrEqual, &SIMDFloat32x4Lib::EntryInfo::GreaterThanOrEqual, 3, PropertyNone);
 
         // others
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::swizzle,            &SIMDFloat32x4Lib::EntryInfo::Swizzle, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::shuffle,            &SIMDFloat32x4Lib::EntryInfo::Shuffle, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Shuffle] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::shuffle, &SIMDFloat32x4Lib::EntryInfo::Shuffle, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Swizzle] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::swizzle, &SIMDFloat32x4Lib::EntryInfo::Swizzle, 3, PropertyNone);
         library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::clamp,              &SIMDFloat32x4Lib::EntryInfo::Clamp,   4, PropertyNone);
         library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::select,             &SIMDFloat32x4Lib::EntryInfo::Select,  4, PropertyNone);
 
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load,  &SIMDFloat32x4Lib::EntryInfo::Load,  3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load1, &SIMDFloat32x4Lib::EntryInfo::Load1, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load2, &SIMDFloat32x4Lib::EntryInfo::Load2, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load3, &SIMDFloat32x4Lib::EntryInfo::Load3, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Load] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load, &SIMDFloat32x4Lib::EntryInfo::Load, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Load1] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load1, &SIMDFloat32x4Lib::EntryInfo::Load1, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Load2] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load2, &SIMDFloat32x4Lib::EntryInfo::Load2, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Load3] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::load3, &SIMDFloat32x4Lib::EntryInfo::Load3, 3, PropertyNone);
 
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store,  &SIMDFloat32x4Lib::EntryInfo::Store,  4, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store1, &SIMDFloat32x4Lib::EntryInfo::Store1, 4, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store2, &SIMDFloat32x4Lib::EntryInfo::Store2, 4, PropertyNone);
-        library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store3, &SIMDFloat32x4Lib::EntryInfo::Store3, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Store] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store, &SIMDFloat32x4Lib::EntryInfo::Store, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Store1] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store1, &SIMDFloat32x4Lib::EntryInfo::Store1, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Store2] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store2, &SIMDFloat32x4Lib::EntryInfo::Store2, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Float32x4_Store3] = library->AddFunctionToLibraryObject(float32x4Function, PropertyIds::store3, &SIMDFloat32x4Lib::EntryInfo::Store3, 4, PropertyNone);
         // end Float32x4
 
         // Float64x2
@@ -2799,9 +2800,10 @@ namespace Js
 
         // Int32x4
         JavascriptFunction* int32x4Function = library->AddFunctionToLibraryObject(simdObject, PropertyIds::Int32x4, &SIMDInt32x4Lib::EntryInfo::Int32x4, 5, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::check,        &SIMDInt32x4Lib::EntryInfo::Check,      2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Int32x4] = int32x4Function;
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Check] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::check, &SIMDInt32x4Lib::EntryInfo::Check, 2, PropertyNone);
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::zero,         &SIMDInt32x4Lib::EntryInfo::Zero,       1, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::splat,        &SIMDInt32x4Lib::EntryInfo::Splat,      2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Splat] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::splat, &SIMDInt32x4Lib::EntryInfo::Splat, 2, PropertyNone);
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::bool_,        &SIMDInt32x4Lib::EntryInfo::Bool,       5, PropertyNone);
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::withFlagX,    &SIMDInt32x4Lib::EntryInfo::WithFlagX,  3, PropertyNone);
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::withFlagY,    &SIMDInt32x4Lib::EntryInfo::WithFlagY,  3, PropertyNone);
@@ -2809,25 +2811,25 @@ namespace Js
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::withFlagW,    &SIMDInt32x4Lib::EntryInfo::WithFlagW,  3, PropertyNone);
 
         // Lane Access
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::extractLane, &SIMDInt32x4Lib::EntryInfo::ExtractLane, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::replaceLane, &SIMDInt32x4Lib::EntryInfo::ReplaceLane, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_ExtractLane] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::extractLane, &SIMDInt32x4Lib::EntryInfo::ExtractLane, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_ReplaceLane] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::replaceLane, &SIMDInt32x4Lib::EntryInfo::ReplaceLane, 4, PropertyNone);
 
         // type conversions
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::fromFloat64x2, &SIMDInt32x4Lib::EntryInfo::FromFloat64x2,         2, PropertyNone);
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::fromFloat64x2Bits, &SIMDInt32x4Lib::EntryInfo::FromFloat64x2Bits, 2, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::fromFloat32x4, &SIMDInt32x4Lib::EntryInfo::FromFloat32x4,         2, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::fromFloat32x4Bits, &SIMDInt32x4Lib::EntryInfo::FromFloat32x4Bits, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_FromFloat32x4] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::fromFloat32x4, &SIMDInt32x4Lib::EntryInfo::FromFloat32x4, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_FromFloat32x4Bits] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::fromFloat32x4Bits, &SIMDInt32x4Lib::EntryInfo::FromFloat32x4Bits, 2, PropertyNone);
 
         // binary ops
         builtinFuncs[BuiltinFunction::SIMD_Int32x4_Add] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::add, &SIMDInt32x4Lib::EntryInfo::Add, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::sub, &SIMDInt32x4Lib::EntryInfo::Sub, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::mul, &SIMDInt32x4Lib::EntryInfo::Mul, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::and, &SIMDInt32x4Lib::EntryInfo::And, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::or,  &SIMDInt32x4Lib::EntryInfo::Or,  3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::xor, &SIMDInt32x4Lib::EntryInfo::Xor, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Sub] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::sub, &SIMDInt32x4Lib::EntryInfo::Sub, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Mul] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::mul, &SIMDInt32x4Lib::EntryInfo::Mul, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_And] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::and, &SIMDInt32x4Lib::EntryInfo::And, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Or] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::or, &SIMDInt32x4Lib::EntryInfo::Or, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Xor] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::xor, &SIMDInt32x4Lib::EntryInfo::Xor, 3, PropertyNone);
 
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::neg, &SIMDInt32x4Lib::EntryInfo::Neg, 2, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::not, &SIMDInt32x4Lib::EntryInfo::Not, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Neg] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::neg, &SIMDInt32x4Lib::EntryInfo::Neg, 2, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Not] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::not, &SIMDInt32x4Lib::EntryInfo::Not, 2, PropertyNone);
 
         // compare ops
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::lessThan,     &SIMDInt32x4Lib::EntryInfo::LessThan,    3, PropertyNone);
@@ -2835,8 +2837,8 @@ namespace Js
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::greaterThan,  &SIMDInt32x4Lib::EntryInfo::GreaterThan, 3, PropertyNone);
 
         // others
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::swizzle,      &SIMDInt32x4Lib::EntryInfo::Swizzle,     3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::shuffle,      &SIMDInt32x4Lib::EntryInfo::Shuffle,     4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Swizzle] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::swizzle, &SIMDInt32x4Lib::EntryInfo::Swizzle, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Shuffle] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::shuffle, &SIMDInt32x4Lib::EntryInfo::Shuffle, 4, PropertyNone);
 
         // shift
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::shiftLeft,            &SIMDInt32x4Lib::EntryInfo::ShiftLeft,              3, PropertyNone);
@@ -2846,15 +2848,15 @@ namespace Js
         // select
         library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::select, &SIMDInt32x4Lib::EntryInfo::Select, 4, PropertyNone);
 
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load,  &SIMDInt32x4Lib::EntryInfo::Load, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load1, &SIMDInt32x4Lib::EntryInfo::Load1, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load2, &SIMDInt32x4Lib::EntryInfo::Load2, 3, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load3, &SIMDInt32x4Lib::EntryInfo::Load3, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Load] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load, &SIMDInt32x4Lib::EntryInfo::Load, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Load1] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load1, &SIMDInt32x4Lib::EntryInfo::Load1, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Load2] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load2, &SIMDInt32x4Lib::EntryInfo::Load2, 3, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Load3] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::load3, &SIMDInt32x4Lib::EntryInfo::Load3, 3, PropertyNone);
 
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store,  &SIMDInt32x4Lib::EntryInfo::Store, 4, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store1, &SIMDInt32x4Lib::EntryInfo::Store1, 4, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store2, &SIMDInt32x4Lib::EntryInfo::Store2, 4, PropertyNone);
-        library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store3, &SIMDInt32x4Lib::EntryInfo::Store3, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Store] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store, &SIMDInt32x4Lib::EntryInfo::Store, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Store1] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store1, &SIMDInt32x4Lib::EntryInfo::Store1, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Store2] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store2, &SIMDInt32x4Lib::EntryInfo::Store2, 4, PropertyNone);
+        builtinFuncs[BuiltinFunction::SIMD_Int32x4_Store3] = library->AddFunctionToLibraryObject(int32x4Function, PropertyIds::store3, &SIMDInt32x4Lib::EntryInfo::Store3, 4, PropertyNone);
 
         // end Int32x4
 

+ 65 - 0
lib/Runtime/LibraryFunction.h

@@ -83,8 +83,73 @@ LIBRARY_FUNCTION(String,        PadEnd,             2,    BIF_UseSrc0 | BIF_Vari
 // SIMD_JS
 #if ENABLE_NATIVE_CODEGEN
 LIBRARY_FUNCTION(SIMD_Float32x4,    Float32x4,         4, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Float32x4)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Check,             1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Check)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Splat,             1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Splat)
+
+LIBRARY_FUNCTION(SIMD_Float32x4,    ExtractLane,       2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::ExtractLane)
+LIBRARY_FUNCTION(SIMD_Float32x4,    ReplaceLane,       3, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::ReplaceLane)
+
+LIBRARY_FUNCTION(SIMD_Float32x4,    FromInt32x4,       1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::FromInt32x4)
+LIBRARY_FUNCTION(SIMD_Float32x4,    FromInt32x4Bits,   1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::FromInt32x4Bits)
+
 LIBRARY_FUNCTION(SIMD_Float32x4,    Add,               2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Add)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Sub,               2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Sub)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Mul,               2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Mul)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Div,               2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Div)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Min,               2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Min)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Max,               2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Max)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Abs,               1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Abs)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Neg,               1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Neg)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Sqrt,              1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Sqrt)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Reciprocal,        1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Reciprocal)
+LIBRARY_FUNCTION(SIMD_Float32x4,    ReciprocalSqrt,    1, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::ReciprocalSqrt)
+
+LIBRARY_FUNCTION(SIMD_Float32x4,    LessThan,          2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::LessThan)
+LIBRARY_FUNCTION(SIMD_Float32x4,    LessThanOrEqual,   2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::LessThanOrEqual)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Equal,             2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Equal)
+LIBRARY_FUNCTION(SIMD_Float32x4,    NotEqual,          2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::NotEqual)
+LIBRARY_FUNCTION(SIMD_Float32x4,    GreaterThan,       2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::GreaterThan)
+LIBRARY_FUNCTION(SIMD_Float32x4,    GreaterThanOrEqual,2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::GreaterThanOrEqual)
+
+LIBRARY_FUNCTION(SIMD_Float32x4,    Shuffle,           6, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Shuffle)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Swizzle,           5, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Swizzle)
+
+LIBRARY_FUNCTION(SIMD_Float32x4,    Load,              2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Load)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Load1,             2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Load1)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Load2,             2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Load2)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Load3,             2, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Load3)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Store,             3, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Store)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Store1,            3, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Store1)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Store2,            3, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Store2)
+LIBRARY_FUNCTION(SIMD_Float32x4,    Store3,            3, BIF_IgnoreDst                                                 , SIMDFloat32x4Lib::EntryInfo::Store3)
 
 LIBRARY_FUNCTION(SIMD_Int32x4,      Int32x4,           4, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Int32x4)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Check,             1, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Check)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Splat,             1, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Splat)
+LIBRARY_FUNCTION(SIMD_Int32x4,      ExtractLane,       2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::ExtractLane)
+LIBRARY_FUNCTION(SIMD_Int32x4,      ReplaceLane,       3, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::ReplaceLane)
+
+LIBRARY_FUNCTION(SIMD_Int32x4,      FromFloat32x4,       1, BIF_IgnoreDst                                               , SIMDInt32x4Lib::EntryInfo::FromFloat32x4)
+LIBRARY_FUNCTION(SIMD_Int32x4,      FromFloat32x4Bits,   1, BIF_IgnoreDst                                               , SIMDInt32x4Lib::EntryInfo::FromFloat32x4Bits)
+
 LIBRARY_FUNCTION(SIMD_Int32x4,      Add,               2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Add)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Sub,               2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Sub)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Mul,               2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Mul)
+LIBRARY_FUNCTION(SIMD_Int32x4,      And,               2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::And)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Or,                2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Or)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Xor,               2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Xor)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Neg,               1, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Neg)
+LIBRARY_FUNCTION(SIMD_Int32x4,      Not,               1, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Not)
+
+LIBRARY_FUNCTION(SIMD_Int32x4,    Shuffle,             6, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Shuffle)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Swizzle,             5, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Swizzle)
+
+LIBRARY_FUNCTION(SIMD_Int32x4,    Load,              2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Load)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Load1,             2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Load1)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Load2,             2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Load2)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Load3,             2, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Load3)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Store,             3, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Store)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Store1,            3, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Store1)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Store2,            3, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Store2)
+LIBRARY_FUNCTION(SIMD_Int32x4,    Store3,            3, BIF_IgnoreDst                                                 , SIMDInt32x4Lib::EntryInfo::Store3)
 #endif

+ 78 - 0
test/SIMD.TypeSpec/excessive-bailouts.js

@@ -0,0 +1,78 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+/*
+Perf BUG:
+Scenario:
+LoopBodyJitting happen on second call. We only have partial profiling info, either X value or Y. The other value remaind Undefined because code didn't execute. 
+FromVar x and y are hoisted outside the compiled loop because they are used before defined.
+Once we enter the jitted loop body, one of the FromVars before the loop starts will fail because X or Y is undefined. And we bailout every time we try to enter the loop. 
+
+Remedy: 
+    1. Don't hoist FromVars outside loops if Value can be Undefined/Null.
+    2. Always keep Var on merges, if we are in JIT loop body. (This is important for correctness). 
+    3. Always keep Var on merges, if value can be undefined/Null.
+*/
+
+WScript.LoadScriptFile("..\\UnitTestFramework\\SimdJsHelpers.js");
+var globTotal;
+function func1(c, d, e)
+{
+    var x, y;
+    var i =0;
+    var j = 0;
+
+    if (c == true)
+    {
+        x = SIMD.Float32x4(1, 1, 1, 1);
+        globTotal = x;
+    }
+    else if (d == false)
+    {
+        y = SIMD.Int32x4(2, 2, 2, 2);
+        globTotal = y;
+    }
+    
+    for (i = 0; i < 10; i++)
+    {
+        for (j = 0; j < 10; j++)
+        {
+            if (c == true)
+            {
+                globTotal = SIMD.Float32x4.add(globTotal, x);
+                x = SIMD.Float32x4(1, 1, 1, 1);
+            }
+            else if (d == false)
+            {
+                globTotal = SIMD.Int32x4.add(globTotal, y);
+                y = SIMD.Int32x4(2, 2, 2, 2);
+            }
+        }
+  
+    }
+    
+    return x;
+}
+
+var c = false;
+var d = false;
+var z;
+for (i = 0; i < 100; i++)
+{
+z = func1(c, d);
+if (i % 2 == 0)
+{
+    equalSimd([202, 202, 202, 202], globTotal, SIMD.Int32x4, "func1");
+}
+else
+{
+    equalSimd([101.0,101.0,101.0,101.0], globTotal, SIMD.Float32x4, "func1");
+}
+c = !c;
+d = !d;
+}
+
+print("PASS");
+

+ 46 - 0
test/SIMD.TypeSpec/if-test.js

@@ -0,0 +1,46 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+WScript.LoadScriptFile("..\\UnitTestFramework\\SimdJsHelpers.js");
+function func1(c, d, e)
+{
+    var x;
+    var i =0;
+    
+    if (c == true)
+    {
+        x = SIMD.Int32x4(1, 2.0, 3, 4);
+    }
+    else
+    {
+        x = SIMD.Float32x4(5, 6.5, 7, 8.5);
+    }
+    
+    for (i = 0; i < 10; i++)
+    {
+        if (d == true)
+        {
+            return SIMD.Int32x4.add(x, x);
+        }
+        else
+        {
+            return SIMD.Float32x4.add(x, x);
+        }
+    }
+    
+    
+}
+
+var c = true;
+var d = true;
+var z;
+for (i = 0; i < 100; i++)
+{
+    z = func1(c, d);
+    c = !c;
+    d = !d;
+}
+equalSimd([10.0,13.0,14.0,17.0], z, SIMD.Float32x4, "func1");
+print("PASS");

+ 69 - 0
test/SIMD.TypeSpec/loop-test-1.js

@@ -0,0 +1,69 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+WScript.LoadScriptFile("..\\UnitTestFramework\\SimdJsHelpers.js");
+
+var globTotal;
+function increment(a, b, lib)
+{
+    
+    return lib.add(a, b);
+}
+function func1(c, d, e)
+{
+    var x, y;
+    var i =0;
+    var j = 0;
+
+    if (c == true)
+    {
+        x = SIMD.Float32x4(1, 1, 1, 1);
+        globTotal = x;
+    }
+    else if (d == false)
+    {
+        y = SIMD.Int32x4(2, 2, 2, 2);
+        globTotal = y;
+    }
+    
+    for (i = 0; i < 10; i++)
+    {
+        for (j = 0; j < 5; j++)
+        {
+            if (c == true)
+            {
+                globTotal = increment(globTotal, x, SIMD.Float32x4);
+                x = SIMD.Float32x4(1, 1, 1, 1);
+            }
+            else if (d == false)
+            {
+                globTotal = increment(globTotal, y, SIMD.Int32x4);
+                y = SIMD.Int32x4(2, 2, 2, 2);
+            }
+        }
+    }
+    return x;
+}
+
+var c = false;
+var d = false;
+var z;
+func1(c, d);
+
+for (i = 0; i < 10; i++)
+{
+    z = func1(c, d);
+    equalSimd([102,102,102,102], globTotal, SIMD.Int32x4, "func1");
+}
+c = !c;
+d = !d;
+// Bail on No profile on first call.
+for (i = 0; i < 10; i++)
+{
+    z = func1(c, d);
+    equalSimd([51.0, 51.0, 51.0, 51.0], globTotal, SIMD.Float32x4, "func1");
+}
+
+print("PASS");

+ 57 - 0
test/SIMD.TypeSpec/loop-test-2.js

@@ -0,0 +1,57 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+WScript.LoadScriptFile("..\\UnitTestFramework\\SimdJsHelpers.js");
+function func1()
+{
+    var x, y;
+    var i =0;
+    var j = 0;
+
+    for (i = 0; i < 3; i++)
+    {
+        for (j = 0; j < 3; j++)
+        {
+            y = SIMD.Int32x4(2, 2, 2, 2);
+            
+        }
+    }
+    return y;
+}
+var z;
+func1();
+func1();
+func1();
+
+func1();
+func1();
+func1();
+
+func1();
+func1();
+func1();
+
+func1();
+func1();
+func1();
+
+func1();
+func1();
+func1();
+
+func1();
+func1();
+func1();
+
+func1();
+func1();
+func1();
+
+func1();
+func1();
+x = func1();
+equalSimd([2,2,2,2], x, SIMD.Int32x4, "func1");
+
+print("PASS");

+ 80 - 0
test/SIMD.TypeSpec/loop-test-3.js

@@ -0,0 +1,80 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+WScript.LoadScriptFile("..\\UnitTestFramework\\SimdJsHelpers.js");
+
+var globTotal;
+var globVar;
+function increment(a, b, lib)
+{
+    
+    var c = lib.add(a, b);
+    
+    if (lib === SIMD.Float32x4)
+    {
+        c = lib.add(c, b);    
+        globVar = lib.neg(lib.sub(a,c));
+        c = globVar;
+        c = lib.abs(c);
+        globVar = c;
+    }
+    return c;
+}
+function func1(c, d, e)
+{
+    var x, y;
+    var i =0;
+    var j = 0;
+
+    if (c == true)
+    {
+        x = SIMD.Float32x4(1, 1, 1, 1);
+        globTotal = x;
+    }
+    else if (d == false)
+    {
+        y = SIMD.Int32x4(2, 2, 2, 2);
+        globTotal = y;
+    }
+    
+    for (i = 0; i < 10; i++)
+    {
+        for (j = 0; j < 5; j++)
+        {
+            if (c == true)
+            {
+                globTotal = increment(globTotal, x, SIMD.Float32x4);
+                x = SIMD.Float32x4(1, 1, 1, 1);
+            }
+            else if (d == false)
+            {
+                globTotal = increment(globTotal, y, SIMD.Int32x4);
+                y = SIMD.Int32x4(2, 2, 2, 2);
+            }
+        }
+    }
+    return x;
+}
+
+var c = false;
+var d = false;
+var z;
+func1(c, d);
+
+for (i = 0; i < 10; i++)
+{
+    z = func1(c, d);
+    equalSimd([102,102,102,102], globTotal, SIMD.Int32x4, "func1");
+}
+c = !c;
+d = !d;
+// Bail on No profile on first call.
+for (i = 0; i < 10; i++)
+{
+    z = func1(c, d);
+    equalSimd([2.0, 2.0, 2.0, 2.0], globTotal, SIMD.Float32x4, "func2");
+}
+
+print("PASS");

+ 153 - 0
test/SIMD.TypeSpec/rlexe.xml

@@ -0,0 +1,153 @@
+<?xml version="1.0" encoding="utf-8"?>
+<regress-exe>
+  <test>
+    <default>
+      <files>excessive-bailouts.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>excessive-bailouts.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>excessive-bailouts.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>excessive-bailouts.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+
+  <test>
+    <default>
+      <files>if-test.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>if-test.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>if-test.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>if-test.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+
+  <test>
+    <default>
+      <files>loop-test-1.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-1.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-1.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-1.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+
+  <test>
+    <default>
+      <files>loop-test-2.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-2.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-2.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-2.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+
+  <test>
+    <default>
+      <files>loop-test-3.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-3.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-3.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>loop-test-3.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+
+  <test>
+    <default>
+      <files>shuffle.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>shuffle.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>shuffle.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>shuffle.js</files>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
+    </default>
+  </test>
+
+</regress-exe>

+ 130 - 0
test/SIMD.TypeSpec/shuffle.js

@@ -0,0 +1,130 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+WScript.LoadScriptFile("..\\UnitTestFramework\\SimdJsHelpers.js");
+var globTotal;
+function increment(a, b, lib)
+{
+    return lib.add(a, b);
+}
+function F4Test(b, lib)
+{
+    var x, y;
+    var i =0;
+    var j = 0;
+    var z;
+    x = lib(100,101,102,103);
+    y = lib(0, 1,2,3);
+   
+    if (b == true)
+    {
+        // swizzle
+        z = lib.shuffle(x, y, 0, 0, 0, 0);
+        equalSimd([100,100,100,100], z, lib, "Float32x4-1");
+        
+        
+    
+        // 2 and 2
+        z = lib.shuffle(x, y, 5, 6, 1, 2);
+        equalSimd([1.0,2.0,101.0,102.0], z, lib, "Float32x4-2");
+        z = lib.shuffle(x, y, 5, 1, 6, 2);
+        equalSimd([1.0,101.0,2.0,102.0], z, lib, "Float32x4-3");
+        z = lib.shuffle(x, y, 1, 5, 2, 6);
+        equalSimd([101.0,1.0,102.0,2.0], z, lib, "Float32x4-4");
+    }
+    else
+    {
+        // 3 and 1
+        z = lib.shuffle(x, y, 1, 5, 6, 7);
+        equalSimd([101.0,1.0,2.0,3.0], z, lib, "Float32x4-5");
+        z = lib.shuffle(x, y, 5, 1, 6, 7);
+        equalSimd([1.0,101.0,2.0,3.0], z, lib, "Float32x4-6");
+        z = lib.shuffle(x, y, 5, 1, 3, 2);
+        equalSimd([1.0,101.0,103.0,102.0], z, lib, "Float32x4-7");
+        z = lib.shuffle(x, y, 1, 5, 0, 1);
+        equalSimd([101.0,1.0,100.0,101.0], z, lib, "Float32x4-8");
+    }
+    if (lib === SIMD.Float32x4)
+    {
+        return lib.swizzle(lib.abs(lib.sub(y, z)), 1, 2, 0, 3);
+    }
+    else
+    {
+        return lib.swizzle(lib.add(z, y), 3, 2, 0, 1);
+    }
+}
+
+function I4Test(b, lib)
+{
+    var x, y;
+    var i =0;
+    var j = 0;
+    var z;
+    x = lib(100,101,102,103);
+    y = lib(0, 1,2,3);
+   
+    if (b == true)
+    {
+        // swizzle
+        z = lib.shuffle(x, y, 0, 0, 0, 0);
+        equalSimd([100,100,100,100], z, lib, "Float32x4-1");
+    
+        // 2 and 2
+        z = lib.shuffle(x, y, 5, 6, 1, 2);
+        equalSimd([1.0,2.0,101.0,102.0], z, lib, "Float32x4-2");
+        z = lib.shuffle(x, y, 5, 1, 6, 2);
+        equalSimd([1.0,101.0,2.0,102.0], z, lib, "Float32x4-3");
+        z = lib.shuffle(x, y, 1, 5, 2, 6);
+        equalSimd([101.0,1.0,102.0,2.0], z, lib, "Float32x4-4");
+    }
+    else
+    {
+        // 3 and 1
+        z = lib.shuffle(x, y, 1, 5, 6, 7);
+        equalSimd([101.0,1.0,2.0,3.0], z, lib, "Float32x4-5");
+        z = lib.shuffle(x, y, 5, 1, 6, 7);
+        equalSimd([1.0,101.0,2.0,3.0], z, lib, "Float32x4-6");
+        z = lib.shuffle(x, y, 5, 1, 3, 2);
+        equalSimd([1.0,101.0,103.0,102.0], z, lib, "Float32x4-7");
+        z = lib.shuffle(x, y, 1, 5, 0, 1);
+        equalSimd([101.0,1.0,100.0,101.0], z, lib, "Float32x4-8");
+    }
+    if (lib === SIMD.Float32x4)
+    {
+        return lib.swizzle(lib.abs(lib.sub(y, z)), 1, 2, 0, 3);
+    }
+    else
+    {
+        return lib.swizzle(lib.add(z, y), 3, 2, 0, 1);
+    }
+}
+
+var c = false;
+var d = false;
+var z;
+var lib;
+
+lib = SIMD.Float32x4;
+z = F4Test(true, lib);
+equalSimd([0, 100, 101, 1], z, lib, "F4Test-true");
+z = F4Test(false, lib);
+equalSimd([0, 98, 101, 98], z, lib, "F4Test-true");
+z = F4Test(true, lib);
+equalSimd([0, 100, 101, 1], z, lib, "F4Test-true");
+z = F4Test(false, lib);
+equalSimd([0, 98, 101, 98], z, lib, "F4Test-true");
+
+// This will make the calls polymorphic, we don't inline and call helpers instead
+lib = SIMD.Int32x4;
+z = I4Test(true, lib);
+equalSimd([5, 104, 101, 2], z, lib, "I4Test-true");
+z = I4Test(false, lib);
+equalSimd([104, 102, 101, 2], z, lib, "I4Test-true");
+z = I4Test(true, lib);
+equalSimd([5, 104, 101, 2], z, lib, "I4Test-true");
+z = I4Test(false, lib);
+equalSimd([104, 102, 101, 2], z, lib, "I4Test-true");
+
+
+print("PASS");

+ 40 - 40
test/SIMD.float32x4.asmjs/rlexe.xml

@@ -28,14 +28,14 @@
     <default>
       <files>testAbsNeg.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testAbsNeg.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -67,14 +67,14 @@
     <default>
       <files>testAddSub.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
     <test>
     <default>
       <files>testAddSub.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -106,14 +106,14 @@
     <default>
       <files>testBitwise.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
     <test>
     <default>
       <files>testBitwise.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -145,14 +145,14 @@
     <default>
       <files>testCalls.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
     <test>
     <default>
       <files>testCalls.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -185,7 +185,7 @@
       <files>testComparison.js</files>
       <baseline>testComparison-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
     <test>
@@ -193,7 +193,7 @@
       <files>testComparison.js</files>
       <baseline>testComparison-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -225,14 +225,14 @@
     <default>
       <files>testConversion.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
     <test>
     <default>
       <files>testConversion.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -265,7 +265,7 @@
       <files>testFields.js</files>
       <baseline>testFields-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
     <test>
@@ -273,7 +273,7 @@
       <files>testFields.js</files>
       <baseline>testFields-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -305,14 +305,14 @@
     <default>
       <files>testInit.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testInit.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -345,7 +345,7 @@
       <files>testMinMax.js</files>
       <baseline>testMinMax-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -353,7 +353,7 @@
       <files>testMinMax.js</files>
       <baseline>testMinMax-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -386,14 +386,14 @@
     <default>
       <files>testMulDiv.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testMulDiv.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -426,7 +426,7 @@
       <files>testRcpSqrt.js</files>
       <baseline>testRcpSqrt-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -434,7 +434,7 @@
       <files>testRcpSqrt.js</files>
       <baseline>testRcpSqrt-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -467,7 +467,7 @@
       <files>testSelect.js</files>
       <baseline>testSelect-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -475,7 +475,7 @@
       <files>testSelect.js</files>
       <baseline>testSelect-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -508,7 +508,7 @@
       <files>testSplat.js</files>
       <baseline>testSplat-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -516,7 +516,7 @@
       <files>testSplat.js</files>
       <baseline>testSplat-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -549,7 +549,7 @@
       <files>testSqrt.js</files>
       <baseline>testSqrt-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -557,7 +557,7 @@
       <files>testSqrt.js</files>
       <baseline>testSqrt-noAsmJs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -590,7 +590,7 @@
     <files>testWithLane.js</files>
     <baseline>testWithLane-noAsmJs.baseline</baseline>
     <tags>exclude_dynapogo,exclude_ship</tags>
-    <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+    <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
   </default>
 </test>
 <test>
@@ -598,7 +598,7 @@
     <files>testWithLane.js</files>
     <baseline>testWithLane-noAsmJs.baseline</baseline>
     <tags>exclude_dynapogo,exclude_ship</tags>
-    <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+    <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
   </default>
 </test>
 
@@ -678,7 +678,7 @@
         <files>testLoadStore.js</files>
         <baseline>testLoadStore-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 <test>
@@ -686,7 +686,7 @@
         <files>testLoadStore.js</files>
         <baseline>testLoadStore-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 
@@ -724,7 +724,7 @@
             <files>testResizeLoadStore-2.js</files>
             <baseline>testResizeLoadStore-2-noAsmJs.baseline</baseline>
             <tags>exclude_dynapogo,exclude_ship</tags>
-            <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+            <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
         </default>
 </test>
 <test>
@@ -732,7 +732,7 @@
             <files>testResizeLoadStore-2.js</files>
             <baseline>testResizeLoadStore-2-noAsmJs.baseline</baseline>
             <tags>exclude_dynapogo,exclude_ship</tags>
-            <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+            <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
         </default>
 </test>
 
@@ -765,7 +765,7 @@
         <files>testResizeLoadStore.js</files>
         <baseline>testResizeLoadStore-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 <test>
@@ -773,7 +773,7 @@
         <files>testResizeLoadStore.js</files>
         <baseline>testResizeLoadStore-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 
@@ -806,7 +806,7 @@
         <files>testShuffle.js</files>
         <baseline>testShuffle-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 <test>
@@ -814,7 +814,7 @@
         <files>testShuffle.js</files>
         <baseline>testShuffle-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 
@@ -847,7 +847,7 @@
         <files>testSwizzle.js</files>
         <baseline>testSwizzle-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 <test>
@@ -855,7 +855,7 @@
         <files>testSwizzle.js</files>
         <baseline>testSwizzle-noAsmJs.baseline</baseline>
         <tags>exclude_dynapogo,exclude_ship</tags>
-        <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+        <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
 </test>
 

+ 32 - 32
test/SIMD.float32x4/rlexe.xml

@@ -11,14 +11,14 @@
     <default>
       <files>testAbsNeg.js</files>
       <baseline>testAbsNeg.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testAbsNeg.js</files>
       <baseline>testAbsNeg.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -34,14 +34,14 @@
     <default>
       <files>testAddSub.js</files>
       <baseline>testAddSub.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testAddSub.js</files>
       <baseline>testAddSub.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -56,14 +56,14 @@
     <default>
       <files>testBitwise.js</files>
       <baseline>testBitwise.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testBitwise.js</files>
       <baseline>testBitwise.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -78,14 +78,14 @@
     <default>
       <files>testClamp.js</files>
       <baseline>testClamp.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testClamp.js</files>
       <baseline>testClamp.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -100,14 +100,14 @@
     <default>
       <files>testComparisons.js</files>
       <baseline>testComparisons.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testComparisons.js</files>
       <baseline>testComparisons.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -122,14 +122,14 @@
     <default>
       <files>testConstructor.js</files>
       <baseline>testConstructor.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testConstructor.js</files>
       <baseline>testConstructor.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -144,14 +144,14 @@
     <default>
       <files>testConversions.js</files>
       <baseline>testConversions.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testConversions.js</files>
       <baseline>testConversions.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -166,14 +166,14 @@
     <default>
       <files>testFields.js</files>
       <baseline>testFields.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testFields.js</files>
       <baseline>testFields.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -188,14 +188,14 @@
     <default>
       <files>testMinMax.js</files>
       <baseline>testMinMax.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testMinMax.js</files>
       <baseline>testMinMax.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -210,14 +210,14 @@
     <default>
       <files>testMulDiv.js</files>
       <baseline>testMulDiv.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testMulDiv.js</files>
       <baseline>testMulDiv.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -232,14 +232,14 @@
     <default>
       <files>testReciprocal.js</files>
       <baseline>testReciprocal.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testReciprocal.js</files>
       <baseline>testReciprocal.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -254,14 +254,14 @@
     <default>
       <files>testScale.js</files>
       <baseline>testScale.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testScale.js</files>
       <baseline>testScale.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -276,14 +276,14 @@
     <default>
       <files>testSelect.js</files>
       <baseline>testSelect.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testSelect.js</files>
       <baseline>testSelect.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -298,14 +298,14 @@
     <default>
       <files>testShuffle.js</files>
       <baseline>testShuffle.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testShuffle.js</files>
       <baseline>testShuffle.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -320,14 +320,14 @@
     <default>
       <files>testSqrt.js</files>
       <baseline>testSqrt.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testSqrt.js</files>
       <baseline>testSqrt.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -342,14 +342,14 @@
     <default>
       <files>testWithLane.js</files>
       <baseline>testWithLane.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testWithLane.js</files>
       <baseline>testWithLane.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
  </regress-exe>

+ 34 - 34
test/SIMD.int32x4.asmjs/rlexe.xml

@@ -29,7 +29,7 @@
       <files>testAddSub.js</files>
       <baseline>testAddSub-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -37,7 +37,7 @@
       <files>testAddSub.js</files>
       <baseline>testAddSub-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -70,7 +70,7 @@
       <files>testBitwise.js</files>
       <baseline>testBitwise-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -78,7 +78,7 @@
       <files>testBitwise.js</files>
       <baseline>testBitwise-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -111,7 +111,7 @@
       <files>testCalls.js</files>
       <baseline>testCalls-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -119,7 +119,7 @@
       <files>testCalls.js</files>
       <baseline>testCalls-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -152,7 +152,7 @@
       <files>testComparison.js</files>
       <baseline>testComparison-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -160,7 +160,7 @@
       <files>testComparison.js</files>
       <baseline>testComparison-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -193,7 +193,7 @@
       <files>testConversion.js</files>
       <baseline>testConversion-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -201,7 +201,7 @@
       <files>testConversion.js</files>
       <baseline>testConversion-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -234,7 +234,7 @@
       <files>testFields.js</files>
       <baseline>testFields-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -242,7 +242,7 @@
       <files>testFields.js</files>
       <baseline>testFields-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -275,7 +275,7 @@
       <files>testMul.js</files>
       <baseline>testMul-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -283,7 +283,7 @@
       <files>testMul.js</files>
       <baseline>testMul-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -316,7 +316,7 @@
       <files>testNeg.js</files>
       <baseline>testNeg-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -324,7 +324,7 @@
       <files>testNeg.js</files>
       <baseline>testNeg-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -357,7 +357,7 @@
       <files>testSelect.js</files>
       <baseline>testSelect-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -365,7 +365,7 @@
       <files>testSelect.js</files>
       <baseline>testSelect-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -398,7 +398,7 @@
       <files>testSplat.js</files>
       <baseline>testSplat-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -406,7 +406,7 @@
       <files>testSplat.js</files>
       <baseline>testSplat-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -439,7 +439,7 @@
       <files>testWithLane.js</files>
       <baseline>testWithLane-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
   </default>
   </test>
   <test>
@@ -447,7 +447,7 @@
       <files>testWithLane.js</files>
       <baseline>testWithLane-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
   </default>
   </test>
 
@@ -472,7 +472,7 @@
           <files>testLoadStore-2.js</files>
           <baseline>testLoadStore-2-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
     <test>
@@ -480,7 +480,7 @@
           <files>testLoadStore-2.js</files>
           <baseline>testLoadStore-2-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
 
@@ -513,7 +513,7 @@
           <files>testLoadStore.js</files>
           <baseline>testLoadStore-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
   <test>
@@ -521,7 +521,7 @@
           <files>testLoadStore.js</files>
           <baseline>testLoadStore-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
 
@@ -546,7 +546,7 @@
           <files>testResizeLoadStore-2.js</files>
           <baseline>testResizeLoadStore-2-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
   <test>
@@ -554,7 +554,7 @@
           <files>testResizeLoadStore-2.js</files>
           <baseline>testResizeLoadStore-2-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
 
@@ -587,7 +587,7 @@
           <files>testResizeLoadStore.js</files>
           <baseline>testResizeLoadStore-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
   <test>
@@ -595,7 +595,7 @@
           <files>testResizeLoadStore.js</files>
           <baseline>testResizeLoadStore-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
 
@@ -628,7 +628,7 @@
           <files>testShuffle.js</files>
           <baseline>testShuffle-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
   <test>
@@ -636,7 +636,7 @@
           <files>testShuffle.js</files>
           <baseline>testShuffle-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
 
@@ -669,7 +669,7 @@
       <files>testSwizzle.js</files>
       <baseline>testSwizzle-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
   <test>
@@ -677,7 +677,7 @@
       <files>testSwizzle.js</files>
       <baseline>testSwizzle-noAsmjs.baseline</baseline>
           <tags>exclude_dynapogo,exclude_ship</tags>
-          <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+          <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
       </default>
   </test>
 </regress-exe>

+ 32 - 32
test/SIMD.int32x4/rlexe.xml

@@ -11,14 +11,14 @@
     <default>
       <files>testAddSub.js</files>
       <baseline>testAddSub.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testAddSub.js</files>
       <baseline>testAddSub.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -34,14 +34,14 @@
     <default>
       <files>testAndOrXor.js</files>
       <baseline>testAndOrXor.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testAndOrXor.js</files>
       <baseline>testAndOrXor.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -56,14 +56,14 @@
     <default>
       <files>testComparisons.js</files>
       <baseline>testComparisons.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testComparisons.js</files>
       <baseline>testComparisons.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -78,14 +78,14 @@
     <default>
       <files>testConstructor.js</files>
       <baseline>testConstructor.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testConstructor.js</files>
       <baseline>testConstructor.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -100,14 +100,14 @@
     <default>
       <files>testConversions.js</files>
       <baseline>testConversions.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testConversions.js</files>
       <baseline>testConversions.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -122,14 +122,14 @@
     <default>
       <files>testFields.js</files>
       <baseline>testFields.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testFields.js</files>
       <baseline>testFields.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -144,14 +144,14 @@
     <default>
       <files>testMul.js</files>
       <baseline>testMul.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testMul.js</files>
       <baseline>testMul.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -166,14 +166,14 @@
     <default>
       <files>testNeg.js</files>
       <baseline>testNeg.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testNeg.js</files>
       <baseline>testNeg.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -188,14 +188,14 @@
     <default>
       <files>testNot.js</files>
       <baseline>testNot.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testNot.js</files>
       <baseline>testNot.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -210,14 +210,14 @@
     <default>
       <files>testSelect.js</files>
       <baseline>testSelect.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testSelect.js</files>
       <baseline>testSelect.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -232,14 +232,14 @@
     <default>
       <files>testShift.js</files>
       <baseline>testShift.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testShift.js</files>
       <baseline>testShift.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -254,14 +254,14 @@
     <default>
       <files>testShuffle.js</files>
       <baseline>testShuffle.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testShuffle.js</files>
       <baseline>testShuffle.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -276,14 +276,14 @@
     <default>
       <files>testSignMask.js</files>
       <baseline>testSignMask.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testSignMask.js</files>
       <baseline>testSignMask.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -298,14 +298,14 @@
     <default>
       <files>testWithFlag.js</files>
       <baseline>testWithFlag.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testWithFlag.js</files>
       <baseline>testWithFlag.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -320,14 +320,14 @@
     <default>
       <files>testWithLane.js</files>
       <baseline>testWithLane.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testWithLane.js</files>
       <baseline>testWithLane.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -342,14 +342,14 @@
     <default>
       <files>testZeroSplatBool.js</files>
       <baseline>testZeroSplatBool.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testZeroSplatBool.js</files>
       <baseline>testZeroSplatBool.baseline</baseline>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 

+ 14 - 14
test/SIMD.workloads.asmjs/rlexe.xml

@@ -29,7 +29,7 @@
       <files>testReverseHeap.js</files>
       <baseline>testReverseHeap-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -37,7 +37,7 @@
       <files>testReverseHeap.js</files>
       <baseline>testReverseHeap-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -69,14 +69,14 @@
     <default>
       <files>testMerge.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testMerge.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -108,14 +108,14 @@
     <default>
       <files>testScale.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>testScale.js</files>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -148,7 +148,7 @@
       <files>testLinearSearch.js</files>
       <baseline>testLinearSearch-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -156,7 +156,7 @@
       <files>testLinearSearch.js</files>
       <baseline>testLinearSearch-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -189,7 +189,7 @@
       <files>test2DMatrixAddition.js</files>
       <baseline>test2DMatrixAddition-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -197,7 +197,7 @@
       <files>test2DMatrixAddition.js</files>
       <baseline>test2DMatrixAddition-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -230,7 +230,7 @@
       <files>test2DMatrixSubtraction.js</files>
       <baseline>test2DMatrixSubtraction-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -238,7 +238,7 @@
       <files>test2DMatrixSubtraction.js</files>
       <baseline>test2DMatrixSubtraction-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 
@@ -271,7 +271,7 @@
       <files>test2DMatrixMultiplication.js</files>
       <baseline>test2DMatrixMultiplication-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -on:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -on:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
   <test>
@@ -279,7 +279,7 @@
       <files>test2DMatrixMultiplication.js</files>
       <baseline>test2DMatrixMultiplication-noAsmjs.baseline</baseline>
       <tags>exclude_dynapogo,exclude_ship</tags>
-      <compile-flags>-bgjit- -simdjs -simd128typespec -off:simplejit -mic:1</compile-flags>
+      <compile-flags>-bgjit- -simdjs -asmjs- -simd128typespec  -off:simplejit -mic:1 -lic:1</compile-flags>
     </default>
   </test>
 

+ 1 - 1
test/UnitTestFramework/SimdJsHelpers.js

@@ -17,7 +17,7 @@ function equal(v, ev) {
         return true;
     else if ((ev == 0.0 || v == 0.0) && Math.abs(v - ev) <= eps) // -0.0 covered here
         return true;
-    else if (Math.abs(v - ev) / ev <= eps)
+    else if (Math.abs(v - ev) / Math.abs(ev) <= eps)
         return true;
     else
         return false;

+ 6 - 0
test/rlexedirs.xml

@@ -311,4 +311,10 @@
     <tags>exclude_serialized,exclude_arm,exclude_arm64,require_backend</tags>
   </default>
 </dir>
+<dir>
+  <default>
+    <files>SIMD.TypeSpec</files>
+    <tags>exclude_serialized,exclude_arm,exclude_arm64,require_backend</tags>
+  </default>
+</dir>
 </regress-exe>