Răsfoiți Sursa

working loads and stores needs clean up

loads & stores cleaned up
Nikolay Korovaiko 8 ani în urmă
părinte
comite
3d31f2316b

+ 3 - 2
lib/Backend/IRBuilderAsmJs.cpp

@@ -6452,11 +6452,11 @@ void IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offs
 {
     Assert(OpCodeAttrAsmJs::HasMultiSizeLayout(newOpcode));
     auto layout = m_jnReader.GetLayout<Js::OpLayoutT_AsmSimdTypedArr<SizePolicy>>();
-    BuildAsmSimdTypedArr(newOpcode, offset, layout->SlotIndex, layout->Value, layout->ViewType, layout->DataWidth);
+    BuildAsmSimdTypedArr(newOpcode, offset, layout->SlotIndex, layout->Value, layout->ViewType, layout->DataWidth, layout->Offset);
 }
 
 void
-IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth)
+IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth, uint32 simdOffset)
 {
     IRType type = TySimd128F4;
     Js::RegSlot valueRegSlot = GetRegSlotFromSimd128Reg(value);
@@ -6784,6 +6784,7 @@ IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, u
     // REVIEW: Store dataWidth in the instruction itself instead of an argument to avoid using ExtendedArgs or excessive opcodes.
     Assert(dataWidth >= 4 && dataWidth <= 16);
     instr->dataWidth = dataWidth;
+    indirOpnd->SetOffset(simdOffset);
     if (maskInstr)
     {
         AddInstr(maskInstr, offset);

+ 1 - 1
lib/Backend/IRBuilderAsmJs.h

@@ -141,7 +141,7 @@ private:
     void                    BuildAsmUnsigned1(Js::OpCodeAsmJs newOpcode, uint value);
     void                    BuildWasmMemAccess(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, uint32 constOffset, Js::ArrayBufferView::ViewType viewType);
     void                    BuildAsmTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType);
-    void                    BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 DataWidth);
+    void                    BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 DataWidth, uint32 simdOffset);
     void                    BuildAsmCall(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::ArgSlot argCount, Js::RegSlot ret, Js::RegSlot function, int8 returnType);
     void                    BuildAsmReg1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstReg);
     void                    BuildBrInt1(Js::OpCodeAsmJs newOpcode, uint32 offset, int32 relativeOffset, Js::RegSlot src);

+ 0 - 1
lib/Backend/LowerMDSharedSimd128.cpp

@@ -2643,7 +2643,6 @@ IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
         // Case (1) requires static bound check. Case (2) means we are always in bound.
 
         // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
-
         if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
         {
             m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);

+ 59 - 3
lib/Backend/amd64/LowererMDArch.cpp

@@ -1117,9 +1117,15 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
     IRType type = src1->GetType();
     IR::RegOpnd * indexOpnd = src1->AsIndirOpnd()->GetIndexOpnd();
     const uint8 dataWidth = instr->dataWidth;
-
     Assert(isSimdLoad == false || dataWidth == 4 || dataWidth == 8 || dataWidth == 12 || dataWidth == 16);
 
+#if ENABLE_FAST_ARRAYBUFFER
+    if (CONFIG_FLAG(WasmFastArray) && m_func->GetJITFunctionBody()->IsWasmFunction())
+    {
+        return instr;
+    }
+#endif
+
 #ifdef _WIN32
     // For x64, bound checks are required only for SIMD loads.
     if (isSimdLoad)
@@ -1147,6 +1153,15 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
             IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
             // MOV tmp, cmpOnd
             Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+#ifdef ENABLE_WASM
+            if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset())
+            {
+                // ADD tmp, offset
+                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+                // JB helper
+                Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+            }
+#endif
             // ADD tmp, dataWidth
             Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
             // JB helper
@@ -1157,7 +1172,23 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
         }
         else
         {
-            lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+#ifdef ENABLE_WASM
+            if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset()) //WASM
+            {
+                IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
+                // MOV tmp, cmpOnd
+                Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+                // ADD tmp, offset
+                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+                // JB helper
+                Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+                lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+            }
+            else
+#endif
+            {
+                lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+            }
         }
         Lowerer::InsertBranch(Js::OpCode::Br, loadLabel, helperLabel);
 
@@ -1225,6 +1256,15 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
             IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
             // MOV tmp, cmpOnd
             Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+#ifdef ENABLE_WASM
+            if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset())
+            {
+                // ADD tmp, offset
+                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+                // JB helper
+                Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+            }
+#endif
             // ADD tmp, dataWidth
             Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
             // JB helper
@@ -1235,7 +1275,23 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
         }
         else
         {
-            lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+#ifdef ENABLE_WASM
+            if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset()) //WASM
+            {
+                IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
+                // MOV tmp, cmpOnd
+                Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+                // ADD tmp, offset
+                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+                // JB helper
+                Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+                lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+            }
+            else
+#endif
+            {
+                lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+            }
         }
         Lowerer::InsertBranch(Js::OpCode::Br, storeLabel, helperLabel);
 

+ 51 - 2
lib/Backend/i386/LowererMDArch.cpp

@@ -976,6 +976,15 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
         IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
         // MOV tmp, cmpOnd
         Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+#ifdef ENABLE_WASM
+        if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset()) //WASM.SIMD
+        {
+            // ADD tmp, offset
+            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+            // JB helper
+            Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+        }
+#endif
         // ADD tmp, dataWidth
         Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
         // JB helper
@@ -986,7 +995,22 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
     }
     else
     {
-        lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+#ifdef ENABLE_WASM
+        if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset()) //WASM.SIMD
+        {
+            IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
+            Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+            // ADD tmp, offset
+            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+            // JB helper
+            Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+            lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+        }
+        else
+#endif
+        {
+            lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+        }
     }
 
     Lowerer::InsertBranch(Js::OpCode::Br, loadLabel, helperLabel);
@@ -1038,6 +1062,16 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
         IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
         // MOV tmp, cmpOnd
         Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+
+#ifdef ENABLE_WASM
+        if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset()) //WASM.SIMD
+        {
+            // ADD tmp, offset
+            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+            // JB helper
+            Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+        }
+#endif
         // ADD tmp, dataWidth
         Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
         // JB helper
@@ -1048,7 +1082,22 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
     }
     else
     {
-        lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+#ifdef ENABLE_WASM
+        if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset()) //WASM.SIMD
+        {
+            IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
+            Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
+            // ADD tmp, offset
+            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+            // JB helper
+            Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
+            lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+        }
+        else
+#endif
+        {
+            lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+        }
     }
 
     if (isSimdStore)

+ 4 - 4
lib/Runtime/ByteCode/AsmJsByteCodeWriter.cpp

@@ -386,11 +386,11 @@ namespace Js
     }
 
     template <typename SizePolicy>
-    bool AsmJsByteCodeWriter::TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType)
+    bool AsmJsByteCodeWriter::TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset)
     {
         OpLayoutT_AsmSimdTypedArr<SizePolicy> layout;
         if (SizePolicy::Assign(layout.Value, value) && SizePolicy::template Assign<ArrayBufferView::ViewType>(layout.ViewType, viewType)
-            && SizePolicy::Assign(layout.SlotIndex, slotIndex) && SizePolicy::template Assign<int8>(layout.DataWidth, dataWidth))
+            && SizePolicy::Assign(layout.SlotIndex, slotIndex) && SizePolicy::template Assign<int8>(layout.DataWidth, dataWidth) && SizePolicy::Assign(layout.Offset, offset))
         {
             m_byteCodeData.EncodeT<SizePolicy::LayoutEnum>(op, &layout, sizeof(layout), this);
             return true;
@@ -561,10 +561,10 @@ namespace Js
         MULTISIZE_LAYOUT_WRITE(WasmMemAccess, op, value, slotIndex, offset, viewType);
     }
 
-    void AsmJsByteCodeWriter::AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType)
+    void AsmJsByteCodeWriter::AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset)
     {
         Assert(dataWidth >= 4 && dataWidth <= 16);
-        MULTISIZE_LAYOUT_WRITE(AsmSimdTypedArr, op, value, slotIndex, dataWidth, viewType);
+        MULTISIZE_LAYOUT_WRITE(AsmSimdTypedArr, op, value, slotIndex, dataWidth, viewType, offset);
     }
 
     void AsmJsByteCodeWriter::AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, int32 slotId)

+ 2 - 2
lib/Runtime/ByteCode/AsmJsByteCodeWriter.h

@@ -50,6 +50,7 @@ namespace Js
         IMP_IWASM void AsmReg9(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8);
         IMP_IWASM void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8,
             RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16);
+        IMP_IWASM void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0);
 
 #ifdef WASM_BYTECODE_WRITER
         // We don't want to expose api not in IWasmByteCodeWriter, but it's easier to compile them anyway
@@ -66,7 +67,6 @@ namespace Js
                       RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16, RegSlot R17, RegSlot R18);
         void AsmBrReg2(OpCodeAsmJs op, ByteCodeLabel labelID, RegSlot R1, RegSlot R2);
         void AsmTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, ArrayBufferView::ViewType viewType);
-        void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType);
     private:
         void AsmJsUnsigned1(OpCodeAsmJs op, uint C1);
         template <typename SizePolicy> bool TryWriteAsmReg1(OpCodeAsmJs op, RegSlot R0);
@@ -99,7 +99,7 @@ namespace Js
         template <typename SizePolicy> bool TryWriteAsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, int32 slotId);
         template <typename SizePolicy> bool TryWriteWasmMemAccess(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint32 offset, ArrayBufferView::ViewType viewType);
         template <typename SizePolicy> bool TryWriteAsmTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, ArrayBufferView::ViewType viewType);
-        template <typename SizePolicy> bool TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType);
+        template <typename SizePolicy> bool TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset);
         template <typename SizePolicy> bool TryWriteAsmJsUnsigned1(OpCodeAsmJs op, uint C1);
 
         void AddJumpOffset(Js::OpCodeAsmJs op, ByteCodeLabel labelId, uint fieldByteOffset);

+ 1 - 0
lib/Runtime/ByteCode/IWasmByteCodeWriter.h

@@ -34,6 +34,7 @@ namespace Js
         virtual void AsmReg9(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8) = 0;
         virtual void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8,
             RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16) = 0;
+        virtual void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0) = 0;
 
         virtual void AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, int32 slotId) = 0;
         virtual void AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op = OpCodeAsmJs::AsmBr) = 0;

+ 0 - 1
lib/Runtime/ByteCode/OpCodesAsmJs.h

@@ -309,7 +309,6 @@ MACRO_EXTEND_WMS( Conv_Check_DTUL            , Long1Double1    , None
 
 MACRO_EXTEND(AsmJsEntryTracing, Empty, None)
 
-
 // help the caller to undefine all the macros
 #undef MACRO
 #undef MACRO_WMS

+ 1 - 0
lib/Runtime/ByteCode/OpLayoutsAsmJs.h

@@ -520,6 +520,7 @@ namespace Js
         typename SizePolicy::RegSlotType     Value;
         ArrayBufferView::ViewType            ViewType;
         int8                                 DataWidth; // # of bytes to load/store
+        uint32                               Offset; //WASM.SIMD
     };
 
     // Generate the multi size layout type defs

+ 6 - 4
lib/Runtime/Language/InterpreterStackFrame.cpp

@@ -7851,8 +7851,9 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
     template <class T>
     void InterpreterStackFrame::OP_SimdLdArrGeneric(const unaligned T* playout)
     {
+        //Output::Print(_u("accessing offset %d and dataWidth %d\n"), playout->Offset, playout->DataWidth);
         Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType];
+        const uint64 index = ((uint32) GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & ArrayBufferView::ViewMask[playout->ViewType];
         JavascriptArrayBuffer* arr = *(JavascriptArrayBuffer**)GetNonVarReg(AsmJsFunctionMemory::ArrayBufferRegister);
         BYTE* buffer = arr->GetBuffer();
         uint8 dataWidth = playout->DataWidth;
@@ -7862,7 +7863,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         {
             JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, _u("Simd typed array access"));
         }
-        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + index);
+        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + (uint32)index);
         AsmJsSIMDValue value;
 
         value = SIMDUtils::SIMDLdData(data, dataWidth);
@@ -7894,7 +7895,8 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
     void InterpreterStackFrame::OP_SimdStArrGeneric(const unaligned T* playout)
     {
         Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType];
+        //const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType];
+        const uint64 index = ((uint32)GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & ArrayBufferView::ViewMask[playout->ViewType];
         JavascriptArrayBuffer* arr = *(JavascriptArrayBuffer**)GetNonVarReg(AsmJsFunctionMemory::ArrayBufferRegister);
         BYTE* buffer = arr->GetBuffer();
         uint8 dataWidth = playout->DataWidth;
@@ -7904,7 +7906,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         {
             JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, _u("Simd typed array access"));
         }
-        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + index);
+        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + (uint32)index);
         AsmJsSIMDValue value = GetRegRawSimd(srcReg);
         SIMDUtils::SIMDStData(data, value, dataWidth);
     }

+ 1 - 0
lib/Runtime/Language/InterpreterStackFrame.h

@@ -217,6 +217,7 @@ namespace Js
 
         template <typename RegSlotType> AsmJsSIMDValue GetRegRawSimd(RegSlotType localRegisterID) const;
         template <typename RegSlotType> void           SetRegRawSimd(RegSlotType localRegisterID, AsmJsSIMDValue bValue);
+
         template <class T> void OP_SimdLdArrGeneric(const unaligned T* playout);
         template <class T> void OP_SimdLdArrConstIndex(const unaligned T* playout);
         template <class T> void OP_SimdStArrGeneric(const unaligned T* playout);

+ 1 - 0
lib/WasmReader/EmptyWasmByteCodeWriter.h

@@ -31,6 +31,7 @@ namespace Js
         virtual void AsmReg9(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8) override {}
         virtual void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8,
             RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16) override {}
+        virtual void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0) override {};
 
         virtual void AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, int32 slotId) override {}
         virtual void AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op = OpCodeAsmJs::AsmBr) override {}

+ 13 - 3
lib/WasmReader/WasmBinaryOpcodesSimd.h

@@ -29,6 +29,14 @@
 #define WASM_SIMD_BUILD_OPCODE(opname, opcode, sig, asmjop, lanes, nyi) WASM_OPCODE(opname, opcode, sig, nyi)
 #endif
 
+#ifndef WASM_SIMD_MEMREAD_OPCODE
+#define WASM_SIMD_MEMREAD_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) WASM_MEM_OPCODE(opname, opcode, sig, nyi)
+#endif
+
+#ifndef WASM_SIMD_MEMSTORE_OPCODE
+#define WASM_SIMD_MEMSTORE_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) WASM_MEM_OPCODE(opname, opcode, sig, nyi)
+#endif
+
 //SIMD Signatures
 #define SIMD_EXTRACT(TYPE, BASE) WASM_SIGNATURE(BASE##_##TYPE, 2, WasmTypes::##BASE##32, WasmTypes::##TYPE)
 #define SIMD_BUILD(TYPE, BASE) WASM_SIGNATURE(TYPE##_##BASE, 2, WasmTypes::##TYPE, WasmTypes::##BASE##32)
@@ -47,8 +55,8 @@ WASM_SIGNATURE(M128_I_M128, 3, WasmTypes::M128, WasmTypes::M128, WasmTypes::I32)
 WASM_SIGNATURE(M128_M128, 2, WasmTypes::M128, WasmTypes::M128)
 
 WASM_MISC_OPCODE(M128Const, 0x100, Limit, true)
-WASM_MISC_OPCODE(M128Load, 0x101, Limit, true)
-WASM_MISC_OPCODE(M128Store, 0x102, Limit, true)
+WASM_SIMD_MEMREAD_OPCODE(M128Load, 0x101, M128_I, Simd128_LdArr_F4, Js::ArrayBufferView::TYPE_FLOAT32, 16, false)
+WASM_SIMD_MEMSTORE_OPCODE(M128Store, 0x102, M128_I, Simd128_StArr_F4, Js::ArrayBufferView::TYPE_FLOAT32, 16, false)
 WASM_MISC_OPCODE(I16Splat, 0x103, Limit, true)
 WASM_MISC_OPCODE(I8Splat, 0x104, Limit, true)
 WASM_MISC_OPCODE(I4Splat, 0x105, Limit, true)
@@ -189,4 +197,6 @@ WASM_MISC_OPCODE(I2TruncU, 0x18b, Limit, true)
 
 #undef WASM_SIMD_BUILD_OPCODE
 #undef WASM_LANE_OPCODE
-#undef WASM_EXTRACTLANE_OPCODE
+#undef WASM_EXTRACTLANE_OPCODE
+#undef WASM_SIMD_MEMREAD_OPCODE
+#undef WASM_SIMD_MEMSTORE_OPCODE

+ 1 - 4
lib/WasmReader/WasmBinaryReader.cpp

@@ -428,7 +428,7 @@ WasmOp WasmBinaryReader::ReadOpCode()
     WasmOp op = (WasmOp)*m_pc++;
     ++m_funcState.count;
 
-    if (op == wbSimdStart || op == wbExtended2)
+    if (op == wbSimdStart)
     {
         if (!CONFIG_FLAG(WasmSimd))
         {
@@ -437,11 +437,8 @@ WasmOp WasmBinaryReader::ReadOpCode()
 
         UINT len;
         UINT32 extOpCode = LEB128(len);
-        extOpCode += wbSimdStart;
         Assert((WasmOp)(extOpCode) == extOpCode);
         op = (WasmOp)extOpCode;
-
-        m_pc += len;
         m_funcState.count += len;
     }
 

+ 51 - 0
lib/WasmReader/WasmByteCodeGenerator.cpp

@@ -51,6 +51,10 @@ PrintTypeStack(const JsUtil::Stack<EmitInfo>& stack)
         case WasmTypes::I64: Output::Print(_u("i64")); break;
         case WasmTypes::F32: Output::Print(_u("f32")); break;
         case WasmTypes::F64: Output::Print(_u("f64")); break;
+#define SIMD_CASE(TYPE, BASE) case WasmTypes::##TYPE: Output::Print(_u(#TYPE)); break;
+
+            FOREACH_SIMD_TYPE(SIMD_CASE)
+#undef SIMD_CASE
         default: Output::Print(_u("any")); break;
         }
     }
@@ -645,6 +649,16 @@ WasmBytecodeGenerator::EmitExpr(WasmOp op)
         Assert(WasmOpCodeSignatures::n##sig > 0);\
         info = EmitMemAccess(wb##opname, WasmOpCodeSignatures::sig, viewtype, true); \
         break;
+#define WASM_SIMD_MEMREAD_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) \
+    case wb##opname: \
+        Assert(WasmOpCodeSignatures::n##sig > 0);\
+        info = EmitSimdMemAccess(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig, viewtype, dataWidth, false); \
+        break;
+#define WASM_SIMD_MEMSTORE_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) \
+    case wb##opname: \
+        Assert(WasmOpCodeSignatures::n##sig > 0);\
+        info = EmitSimdMemAccess(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig, viewtype, dataWidth, true); \
+        break;
 #define WASM_BINARY_OPCODE(opname, opcode, sig, asmjsop, nyi) \
     case wb##opname: \
         Assert(WasmOpCodeSignatures::n##sig == 3);\
@@ -1303,6 +1317,43 @@ WasmBytecodeGenerator::EmitExtractLane(Js::OpCodeAsmJs op, const WasmTypes::Wasm
     return resultInfo;
 }
 
+EmitInfo
+WasmBytecodeGenerator::EmitSimdMemAccess(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth, bool isStore)
+{
+
+    WasmTypes::WasmType type = signature[0];
+    SetUsesMemory(0);
+
+    const uint32 mask = Js::ArrayBufferView::ViewMask[viewType];
+    const uint offset = GetReader()->m_currentNode.mem.offset;
+
+    EmitInfo rhsInfo;
+    if (isStore)
+    {
+        rhsInfo = PopEvalStack(type, _u("Invalid type for store op"));
+    }
+    EmitInfo exprInfo = PopEvalStack(WasmTypes::I32, _u("Index expression must be of type i32"));
+
+    if (isStore)
+    {
+        m_writer->AsmSimdTypedArr(op, rhsInfo.location, exprInfo.location, dataWidth, viewType, offset);
+
+        ReleaseLocation(&rhsInfo);
+        ReleaseLocation(&exprInfo);
+
+        return EmitInfo();
+    }
+
+    Js::RegSlot resultReg = GetRegisterSpace(type)->AcquireTmpRegister();
+    m_writer->AsmSimdTypedArr(op, resultReg, exprInfo.location, dataWidth, viewType, offset);
+
+    EmitInfo yieldInfo = EmitInfo(resultReg, type);
+    ReleaseLocation(&exprInfo);
+
+    return yieldInfo;
+}
+
+
 EmitInfo
 WasmBytecodeGenerator::EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, bool isStore)
 {

+ 1 - 0
lib/WasmReader/WasmByteCodeGenerator.h

@@ -146,6 +146,7 @@ namespace Wasm
         EmitInfo EmitBrIf();
 
         EmitInfo EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, bool isStore);
+        EmitInfo EmitSimdMemAccess(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth, bool isStore);
         EmitInfo EmitBinExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);
         EmitInfo EmitUnaryExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);
         EmitInfo EmitExtractLane(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);

+ 80 - 0
test/wasm.simd/loadTests.js

@@ -0,0 +1,80 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+let passed = true;
+
+function assertEquals(expected, actual) {
+    if (expected != actual) {
+        passed = false;
+        throw `Expected ${expected}, received ${actual}`;
+    }
+}
+
+function testLoadOpsForType(funcname, module, laneValues, expectedResults, startPositions) {
+
+    let memObj = new WebAssembly.Memory({initial:INITIAL_SIZE});
+    const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports;
+
+    let intArray = new Int32Array (memObj.buffer);
+
+    let forEachTestPosition =  (action)  => {
+
+            for (const pos of startPositions) {
+            for (let i = 0; i < 4; i++) {
+                action(pos, i); 
+            }
+        }
+    };
+    
+    forEachTestPosition ((pos, i) => {intArray[pos + i]  = laneValues[i];});
+    instance[funcname](0);
+    forEachTestPosition((pos, i) => {assertEquals(intArray[pos + i], expectedResults[i]);});
+
+    const MEM_SIZE_IN_BYTES = 1024 * 64;
+
+    let check = function(expected, funName, ...args) {
+        let fun = eval(funName);
+        var result;
+        try {
+        result = fun(...args);
+        
+        }
+        catch (e) {
+            if (e.message === "Access index is out of range" || 
+                e.message === "Simd typed array access: argument out of range" ||
+                e.message === "argument out of range"
+            ) {
+                result = "Access index is out of range";
+            }
+            else {
+                result = e.message;
+            }
+        }
+
+        if(result != expected)
+        {
+            passed = false;
+            print(`${funName}(${[...args]}) produced ${result}, expected ${expected}`);
+        }
+    }
+    
+    check(0, "instance.m128_load4", MEM_SIZE_IN_BYTES - 32);
+    check(0, "instance.m128_load4", MEM_SIZE_IN_BYTES - 16);
+    check("Access index is out of range", "instance.m128_load4", MEM_SIZE_IN_BYTES - 8);
+    check("Access index is out of range", "instance.m128_load4", MEM_SIZE_IN_BYTES - 4);
+
+}
+
+const INITIAL_SIZE = 1;
+const module = new WebAssembly.Module(readbuffer('loads.wasm'));
+
+const laneValues = [0xAAAAAAAA, 0xFFFFFFFF, 0X80000000, 0x90A762A6];
+const expectedResults = [16, 32, 1, 14]; //i32.popcnt
+const startPositions = [0, 5, 11, 17];
+
+testLoadOpsForType("m128_load_test", module, laneValues, expectedResults,startPositions);
+
+if (passed) {
+    print("Passed");
+}

BIN
test/wasm.simd/loads.wasm


+ 45 - 0
test/wasm.simd/loads.wast

@@ -0,0 +1,45 @@
+(module
+    (import "dummy" "memory" (memory 1))
+
+    (func (export "m128_load4") (param $x i32) (result i32)
+        (i32x4.extract_lane lane=0 (m128.load offset=0 align=4 (get_local $x)))
+    )
+
+    (func (export "m128_load_test") (param $x i32) (local m128)       
+        (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
+        (i32.store offset=0 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
+        (i32.store offset=4 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
+        (i32.store offset=8 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
+        (i32.store offset=12 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+        ;;
+        (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
+        (i32.store offset=20 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
+        (i32.store offset=24 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
+        (i32.store offset=28 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
+        (i32.store offset=32 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+        ;;
+        (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
+        (i32.store offset=44 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
+        (i32.store offset=48 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
+        (i32.store offset=52 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
+        (i32.store offset=56 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+        ;;
+        (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
+        (i32.store offset=68 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
+        (i32.store offset=72 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
+        (i32.store offset=76 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
+        (i32.store offset=80 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+    )
+)

+ 16 - 4
test/wasm.simd/rlexe.xml

@@ -2,14 +2,26 @@
 <regress-exe>
 <test>
   <default>
-    <files>buildExtractTests.js</files>
-    <compile-flags>-wasm -wasmsimd</compile-flags>
+    <files>loadTests.js</files>
+    <compile-flags> -wasm -wasmsimd</compile-flags>
   </default>
 </test>
 <test>
   <default>
-    <files>buildExtractTests.js</files>
-    <compile-flags>-wasm -wasmsimd -maic:0</compile-flags>
+    <files>loadTests.js</files>
+    <compile-flags> -maic:0 -wasm -wasmsimd</compile-flags>
+  </default>
+</test>
+<test>
+  <default>
+    <files>storeTests.js</files>
+    <compile-flags> -wasm -wasmsimd</compile-flags>
+  </default>
+</test>
+<test>
+  <default>
+    <files>storeTests.js</files>
+    <compile-flags> -maic:0 -wasm -wasmsimd</compile-flags>
   </default>
 </test>
 </regress-exe>

+ 67 - 0
test/wasm.simd/storeTests.js

@@ -0,0 +1,67 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+let passed = true;
+
+function assertEquals(expected, actual) {
+    if (expected != actual) {
+        passed = false;
+        throw `Expected ${expected}, received ${actual}`;
+    }
+}
+
+let check = function(expected, funName, ...args) {
+    let fun = eval(funName);
+    var result;
+    try {
+       result = fun(...args);
+    }
+    catch (e) {
+        result = e.name;
+    }
+
+    if(result != expected)
+    {
+        passed = false;
+        print(`${funName}(${[...args]}) produced ${result}, expected ${expected}`);
+    }
+}
+
+const INITIAL_SIZE = 1;
+const module = new WebAssembly.Module(readbuffer('stores.wasm'));
+
+let memObj = new WebAssembly.Memory({initial:INITIAL_SIZE});
+const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports;
+let intArray = new Int32Array (memObj.buffer);
+
+let testStore = function (funcname, ...expected) {
+    
+    const index = 0;
+    
+    for (let i = 0; i < expected.length; i++) {
+        intArray[4 + i] = expected[i];
+    }
+    
+    instance[funcname](index, ...expected);
+        
+    for (let i = 0; i < expected.length; i++)
+        assertEquals(expected[i], intArray[index + i]);
+}
+
+
+testStore("m128_store4", 777, 888, 999, 1111);
+testStore("m128_store4", -1, 0, 0, -1);
+testStore("m128_store4", -1, -1, -1, -1);
+
+
+const MEM_SIZE_IN_BYTES = 1024 * 64;
+check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 12, 777, 888, 999, 1111);
+check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 8, 777, 888, 999, 1111);
+check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 4, 777, 888, 999, 1111);
+check(undefined, "instance.m128_store4", MEM_SIZE_IN_BYTES - 16, 777, 888, 999, 1111);
+
+if (passed) {
+    print("Passed");
+}

BIN
test/wasm.simd/stores.wasm


+ 8 - 0
test/wasm.simd/stores.wast

@@ -0,0 +1,8 @@
+(module
+    (import "dummy" "memory" (memory 1))
+
+    (func (export "m128_store4") (param i32 i32 i32 i32 i32) (local m128) 
+        (set_local 5 (m128.load offset=0 align=4 (i32.const 16)))
+        (m128.store offset=0 align=4 (get_local 0) (get_local 5))
+    )
+)